This is the merge of the upstream LTS release of 5.15.94 into the android13-5.15 branch. It contains the following commits: *5448b2fda8Merge 5.15.94 into android13-5.15-lts |\ | *e2c1a934fdLinux 5.15.94 | *17170acdc7Documentation/hw-vuln: Add documentation for Cross-Thread Return Predictions | *5122e0e443KVM: x86: Mitigate the cross-thread return address predictions bug | *8f12dcab90x86/speculation: Identify processors vulnerable to SMT RSB predictions | *e63c434de8drm/i915: Fix VBT DSI DVO port handling | *fc88c68381drm/i915: Initialize the obj flags for shmem objects | *2e557c8ca2drm/amdgpu/fence: Fix oops due to non-matching drm_sched init/fini | *3af734f3eaFix page corruption caused by racy check in __free_pages | *c94ce5ea68arm64: dts: meson-axg: Make mmc host controller interrupts level-sensitive | *b796c02df3arm64: dts: meson-g12-common: Make mmc host controller interrupts level-sensitive | *5d9b771f53arm64: dts: meson-gx: Make mmc host controller interrupts level-sensitive | *ac39dce119rtmutex: Ensure that the top waiter is always woken up | *86f7e42393powerpc/64s/interrupt: Fix interrupt exit race with security mitigation switch | *2907cf3f2eriscv: Fixup race condition on PG_dcache_clean in flush_icache_pte | *beb1cefa3cceph: flush cap releases when the session is flushed | *86733ab239clk: ingenic: jz4760: Update M/N/OD calculation algorithm | *239e927eb2usb: typec: altmodes/displayport: Fix probe pin assign check | *48aecce116usb: core: add quirk for Alcor Link AK9563 smartcard reader | *a8178bb1c7btrfs: free device in btrfs_close_devices for a single device filesystem | *8d13f2c3e2mptcp: be careful on subflow status propagation on errors | *25141fb411net: USB: Fix wrong-direction WARNING in plusb.c | *d1fba1e096cifs: Fix use-after-free in rdata->read_into_pages() | *1b83e7e174pinctrl: intel: Restore the pins that used to be in Direct IRQ mode | *f5f025b703spi: dw: Fix wrong FIFO level setting for long xfers | *71668706fbpinctrl: single: fix potential NULL dereference | *a2a1065739pinctrl: aspeed: Fix confusing types in return value | *99450163bcpinctrl: mediatek: Fix the drive register definition of some Pins | *9f0d2c2684ASoC: topology: Return -ENOMEM on memory allocation failure | *1a52ef89e3riscv: stacktrace: Fix missing the first frame | *5fb8154334ALSA: pci: lx6464es: fix a debug loop | *105ea562f6selftests: forwarding: lib: quote the sysctl values | *528e3f3a4brds: rds_rm_zerocopy_callback() use list_first_entry() | *48d6d8f2f6igc: Add ndo_tx_timeout support | *62ff7dd961net/mlx5: Serialize module cleanup with reload and remove | *95d2394f84net/mlx5: fw_tracer, Zero consumer index when reloading the tracer | *ab7f3f6a9dnet/mlx5: fw_tracer, Clear load bit when freeing string DBs buffers | *193528646enet/mlx5e: IPoIB, Show unknown speed instead of error | *7c6e8eb617net/mlx5: Bridge, fix ageing of peer FDB entries | *49ece61a07net/mlx5e: Update rx ring hw mtu upon each rx-fcs flag change | *31172267banet/mlx5e: Introduce the mlx5e_flush_rq function | *e4e4e93d31net/mlx5e: Move repeating clear_bit in mlx5e_rx_reporter_err_rq_cqe_recover | *3f18b9ed8cnet: mscc: ocelot: fix VCAP filters not matching on MAC with "protocol 802.1Q" | *6acb5d853bnet: dsa: mt7530: don't change PVC_EG_TAG when CPU port becomes VLAN-aware | *ca834a0178ice: Do not use WQ_MEM_RECLAIM flag for workqueue | *70d48c7992uapi: add missing ip/ipv6 header dependencies for linux/stddef.h | *3cec44036fionic: clean interrupt before enabling queue to avoid credit race | *fad12afe87net: phy: meson-gxl: use MMD access dummy stubs for GXL, internal PHY | *d23385a200bonding: fix error checking in bond_debug_reregister() | *11006d9d08net: phylink: move phy_device_free() to correctly release phy device | *fb022d7b1cxfrm: fix bug with DSCP copy to v6 from v4 tunnel | *6fe1ad42afRDMA/usnic: use iommu_map_atomic() under spin_lock() | *8f5fe1cd8eRDMA/irdma: Fix potential NULL-ptr-dereference | *1b4ef90cbcIB/IPoIB: Fix legacy IPoIB due to wrong number of queues | *5dc688fae6xfrm/compat: prevent potential spectre v1 gadget in xfrm_xlate32_attr() | *9bae58d58bIB/hfi1: Restore allocated resources on failed copyout | *558b1fa01cxfrm: compat: change expression for switch in xfrm_xlate64 | *238b38e89fcan: j1939: do not wait 250 ms if the same addr was already claimed | *d859184b60of/address: Return an error when no valid dma-ranges are found | *70f37b3118tracing: Fix poll() and select() do not work on per_cpu trace_pipe and trace_pipe_raw | *df01749503ALSA: hda/realtek: Enable mute/micmute LEDs on HP Elitebook, 645 G9 | *ca9d542203ALSA: hda/realtek: Fix the speaker output on Samsung Galaxy Book2 Pro 360 | *706b6d86a6ALSA: emux: Avoid potential array out-of-bound in snd_emux_xg_control() | *731fc29de6ALSA: hda/realtek: Add Positivo N14KP6-TG | *b938059807btrfs: zlib: zero-initialize zlib workspace | *e65faa7e39btrfs: limit device extents to the device size | *2e4dd07fdamigrate: hugetlb: check for hugetlb shared PMD in node migration | *072e7412e8mm/migration: return errno when isolate_huge_page failed * |f977f92131Revert "nvmem: core: remove nvmem_config wp_gpio" * |787413edadMerge 5.15.93 into android13-5.15-lts |\| | *85d7786c66Linux 5.15.93 | *6e2fac197dbpf: Skip invalid kfunc call in backtrack_insn | *46c9088cabgfs2: Always check inode size of inline inodes | *8eb2e58a92gfs2: Cosmetic gfs2_dinode_{in,out} cleanup | *e4991910f1wifi: brcmfmac: Check the count value of channel spec to prevent out-of-bounds reads | *97ccfffcc0f2fs: fix to do sanity check on i_extra_isize in is_alive() | *64fa364ad3fbdev: smscufx: fix error handling code in ufx_usb_probe | *a77141a063ovl: Use "buf" flexible array for memcpy() destination | *1692fedd0ffs/ntfs3: Validate attribute data and valid sizes | *a5b9cb7276powerpc/imc-pmu: Revert nest_init_lock to being a mutex | *3691f43a09iio:adc:twl6030: Enable measurement of VAC | *8c84f50390bpf: Do not reject when the stack read size is different from the tracked scalar size | *14b6198abbbpf: Fix incorrect state pruning for <8B spill/fill | *575a9f6fefphy: qcom-qmp-combo: fix runtime suspend | *e58df87394phy: qcom-qmp-combo: fix broken power on | *368ea32e0aphy: qcom-qmp-usb: fix memleak on probe deferral | *2f27d3811aphy: qcom-qmp-combo: fix memleak on probe deferral | *0cb10ddab7phy: qcom-qmp-combo: disable runtime PM on unbind | *0ef5ffe116serial: 8250_dma: Fix DMA Rx rearm race | *e30328f599serial: 8250_dma: Fix DMA Rx completion race | *a5a171f61anvmem: core: fix cell removal on error | *6d9fa3ff65nvmem: core: remove nvmem_config wp_gpio | *adf80e072cnvmem: core: initialise nvmem->id early | *e3ebc3e23bdrm/i915: Fix potential bit_17 double-free | *997bed0f3cSquashfs: fix handling and sanity checking of xattr_ids count | *7a0cfaf9d4highmem: round down the address passed to kunmap_flush_on_unmap() | *5dbe1ebd56mm/swapfile: add cond_resched() in get_swap_pages() | *daf8241804fpga: stratix10-soc: Fix return value check in s10_ops_write_init() | *afd32b6831x86/debug: Fix stack recursion caused by wrongly ordered DR7 accesses | *066ecbf1a5kernel/irq/irqdomain.c: fix memory leak with using debugfs_lookup() | *481bf49f58usb: gadget: f_uac2: Fix incorrect increment of bNumEndpoints | *fdf40e5824mm: hugetlb: proc: check for hugetlb shared PMD in /proc/PID/smaps | *6c300351c5riscv: disable generation of unwind tables | *a5c275add9parisc: Wire up PTRACE_GETREGS/PTRACE_SETREGS for compat case | *a964decd13parisc: Fix return code of pdc_iodc_print() | *488eaf0625nvmem: qcom-spmi-sdam: fix module autoloading | *8569beb66fiio: imu: fxos8700: fix MAGN sensor scale and unit | *8aa5cdcfafiio: imu: fxos8700: remove definition FXOS8700_CTRL_ODR_MIN | *4112ba1ad5iio: imu: fxos8700: fix failed initialization ODR mode assignment | *abf7b2ba51iio: imu: fxos8700: fix incorrect ODR mode readback | *412757741ciio: imu: fxos8700: fix swapped ACCEL and MAGN channels readback | *34909532b1iio: imu: fxos8700: fix map label of channel type to MAGN sensor | *8346eb4987iio: imu: fxos8700: fix IMU data bits returned to user space | *7567cdf3ceiio: imu: fxos8700: fix incomplete ACCEL and MAGN channels readback | *6969852220iio: imu: fxos8700: fix ACCEL measurement range selection | *cdacfb2205iio:adc:twl6030: Enable measurements of VUSB, VBAT and others | *9988063dceiio: adc: berlin2-adc: Add missing of_node_put() in error path | *c691a5c0fdiio: hid: fix the retval in gyro_3d_capture_sample | *ef80a34699iio: hid: fix the retval in accel_3d_capture_sample | *c4eae85c73efi: Accept version 2 of memory attributes table | *710db82063ALSA: hda/realtek: Add Acer Predator PH315-54 | *3fbddf86d9watchdog: diag288_wdt: fix __diag288() inline assembly | *700dd5bc72watchdog: diag288_wdt: do not use stack buffers for hardware data | *21bc51e29enet: qrtr: free memory on error path in radix_tree_insert() | *dccbd062d7fbcon: Check font dimension limits | *5d7500d991Input: i8042 - add Clevo PCX0DX to i8042 quirk table | *fc9e27f3bavc_screen: move load of struct vc_data pointer in vcs_read() to avoid UAF | *9ba1188a71usb: gadget: f_fs: Fix unbalanced spinlock in __ffs_ep0_queue_wait | *fe86480e90usb: dwc3: qcom: enable vbus override when in OTG dr-mode | *a412fe7bafiio: adc: stm32-dfsdm: fill module aliases | *9944659398drm/amd/display: Fix timing not changning when freesync video is enabled | *a3967128bcnet/x25: Fix to not accept on connected socket | *396ea318e7platform/x86: gigabyte-wmi: add support for B450M DS3H WIFI-CF | *1577524633platform/x86: dell-wmi: Add a keymap for KEY_MUTE in type 0x0010 table | *540cea9f9bi2c: rk3x: fix a bunch of kernel-doc warnings | *0aaabdb900scsi: iscsi_tcp: Fix UAF during login when accessing the shost ipaddress | *17b738590bscsi: iscsi_tcp: Fix UAF during logout when accessing the shost ipaddress | *8cd0499f9cperf/x86/intel: Add Emerald Rapids | *7093515370scsi: target: core: Fix warning on RT kernels | *b7960f5436i2c: mxs: suppress probe-deferral error message | *b9b87fc34bi2c: designware-pci: Add new PCI IDs for AMD NAVI GPU | *d8fc0b5fb3efi: fix potential NULL deref in efi_mem_reserve_persistent | *f423c2efd5net: openvswitch: fix flow memory leak in ovs_flow_cmd_new | *7985028647virtio-net: Keep stop() to follow mirror sequence of open() | *5d884f9e80selftests: net: udpgso_bench_tx: Cater for pending datagrams zerocopy benchmarking | *63aa63af3aselftests: net: udpgso_bench: Fix racing bug between the rx/tx programs | *d41a3f9cc2selftests: net: udpgso_bench_rx/tx: Stop when wrong CLI args are provided | *5af98283e5selftests: net: udpgso_bench_rx: Fix 'used uninitialized' compiler warning | *89e0701e03ata: libata: Fix sata_down_spd_limit() when no link speed is reported | *9ab896775fcan: j1939: fix errant WARN_ON_ONCE in j1939_session_deactivate | *02d77d98e0igc: return an error if the mac type is unknown in igc_ptp_systim_to_hwtstamp() | *04a7355820riscv: kprobe: Fixup kernel panic when probing an illegal position | *206c367b6aip/ip6_gre: Fix non-point-to-point tunnel not generating IPv6 link local address | *90178bc0f2ip/ip6_gre: Fix changing addr gen mode not generating IPv6 link local address | *dfe2f0ea38net: phy: meson-gxl: Add generic dummy stubs for MMD register access | *b7398efe24squashfs: harden sanity check in squashfs_read_xattr_id_table | *89a69216f1netfilter: br_netfilter: disable sabotage_in hook after first suppression | *cdb444e73fdrm/i915/adlp: Fix typo for reference clock | *960f20d858drm/i915/guc: Fix locking when searching for a hung request | *c27e0eac56netrom: Fix use-after-free caused by accept on already connected socket | *511c922c5bblock, bfq: fix uaf for bfqq in bic_set_bfqq() | *a62c129dcbblock, bfq: replace 0/1 with false/true in bic apis | *37a744a068block/bfq-iosched.c: use "false" rather than "BLK_RW_ASYNC" | *2cd1e9c013net: phy: dp83822: Fix null pointer access on DP83825/DP83826 devices | *18c18c2110sfc: correctly advertise tunneled IPv6 segmentation | *878b06f60adpaa2-eth: execute xdp_do_flush() before napi_complete_done() | *3b5774cd6bdpaa_eth: execute xdp_do_flush() before napi_complete_done() | *5a7040a649virtio-net: execute xdp_do_flush() before napi_complete_done() | *94add5b272qede: execute xdp_do_flush() before napi_complete_done() | *a273f8e3abice: Prevent set_channel from changing queues while RDMA active | *b432e183c2fix "direction" argument of iov_iter_kvec() | *d8b8306e96fix iov_iter_bvec() "direction" argument | *389c7c0ef9READ is "data destination", not source... | *7a3649bf5bWRITE is "data source", not destination... | *83cc6a7bb7vhost/net: Clear the pending messages when the backend is removed | *7c7d344bc3scsi: Revert "scsi: core: map PQ=1, PDT=other values to SCSI_SCAN_TARGET_PRESENT" | *4b199dc094drm/vc4: hdmi: make CEC adapter name unique | *dc1f8ab25aarm64: dts: imx8mm: Fix pad control for UART1_DTE_RX | *c681d7a4edbpf, sockmap: Check for any of tcp_bpf_prots when cloning a listener | *34ad5d8885bpf: Fix to preserve reg parent/live fields when copying range info | *7b86f9ab56bpf: Support <8-byte scalar spill and refill | *1b9256c962ALSA: hda/via: Avoid potential array out-of-bound in add_secret_dac_path() | *b7abeb6916bpf: Fix a possible task gone issue with bpf_send_signal[_thread]() helpers | *cfcc2390dbASoC: Intel: bytcr_wm5102: Drop reference count of ACPI device after use | *b4b204565aASoC: Intel: bytcr_rt5640: Drop reference count of ACPI device after use | *1f1e7635c5ASoC: Intel: bytcr_rt5651: Drop reference count of ACPI device after use | *41d323c352ASoC: Intel: bytcht_es8316: Drop reference count of ACPI device after use | *6a9990e1d9ASoC: Intel: bytcht_es8316: move comment to the right place | *ffcdf35455ASoC: Intel: boards: fix spelling in comments | *bd0b17ab1bbus: sunxi-rsb: Fix error handling in sunxi_rsb_init() | *5f4543c938firewire: fix memory leak for payload of request subaction to IEC 61883-1 FCP region * |5020746bffMerge 5.15.92 into android13-5.15-lts |\| | *e515b9902fLinux 5.15.92 | *c7caf669b8net: mctp: purge receive queues on sk destruction | *046de74f9anet: fix NULL pointer in skb_segment_list | *7ab3376703selftests: Provide local define of __cpuid_count() | *e92e311cedselftests/vm: remove ARRAY_SIZE define from individual tests | *c9e52db900tools: fix ARRAY_SIZE defines in tools and selftests hdrs | *c1aa0dd52dBluetooth: fix null ptr deref on hci_sync_conn_complete_evt | *02e61196c5ACPI: processor idle: Practically limit "Dummy wait" workaround to old Intel systems | *79dd676b44extcon: usbc-tusb320: fix kernel-doc warning | *c2bd60ef20ext4: fix bad checksum after online resize | *4cd1e18bc0cifs: fix return of uninitialized rc in dfs_cache_update_tgthint() | *43acd767bddmaengine: imx-sdma: Fix a possible memory leak in sdma_transfer_init | *a54c5ad007HID: playstation: sanity check DualSense calibration data. | *6d7686cc11blk-cgroup: fix missing pd_online_fn() while activating policy | *2144859229erofs/zmap.c: Fix incorrect offset calculation | *0dfef50313bpf: Skip task with pid=1 in send_signal_common() | *e8bb772f74firmware: arm_scmi: Clear stale xfer->hdr.status | *80cb9f1a76arm64: dts: imx8mq-thor96: fix no-mmc property for SDHCI | *162fad24d2arm64: dts: freescale: Fix pca954x i2c-mux node names | *82ad105e1aARM: dts: vf610: Fix pca9548 i2c-mux node names | *5aee5f33e0ARM: dts: imx: Fix pca9547 i2c-mux node name * |7e0097918fRevert "scsi: ufs: core: Fix devfreq deadlocks" * |6ce0fcdcc2Revert "thermal/core: Rename 'trips' to 'num_trips'" * |49a5232dfbRevert "thermal: Validate new state in cur_state_store()" * |be0ca2fc43Revert "thermal/core: fix error code in __thermal_cooling_device_register()" * |9617a003ccRevert "thermal: core: call put_device() only after device_register() fails" * |ccb2c48531Revert "cpufreq: governor: Use kobject release() method to free dbs_data" * |0108f014a5Revert "gpio: use raw spinlock for gpio chip shadowed data" * |1d2449f6beRevert "gpio: mxc: Protect GPIO irqchip RMW with bgpio spinlock" * |5f51aedcbaRevert "gpio: mxc: Unlock on error path in mxc_flip_edge()" * |7622c50ba6Merge 5.15.91 into android13-5.15-lts |\| | *9cf4111cdfLinux 5.15.91 | *14cc13e433perf/x86/amd: fix potential integer overflow on shift of a int | *033636b322netfilter: conntrack: unify established states for SCTP paths | *0b08201158x86/i8259: Mark legacy PIC interrupts with IRQ_LEVEL | *b577400367block: fix and cleanup bio_check_ro | *1d152437e4kbuild: Allow kernel installation packaging to override pkg-config | *a196468858cpufreq: governor: Use kobject release() method to free dbs_data | *7c513ced0dcpufreq: Move to_gov_attr_set() to cpufreq.h | *cf7a08622dRevert "Input: synaptics - switch touchpad on HP Laptop 15-da3001TU to RMI mode" | *53c5d61198tools: gpio: fix -c option of gpio-event-mon | *a7d1a303fftreewide: fix up files incorrectly marked executable | *046fe53907net: mdio-mux-meson-g12a: force internal PHY off on mux switch | *86bdccde78net/tg3: resolve deadlock in tg3_reset_task() during EEH | *4364bf79d8thermal: intel: int340x: Add locking to int340x_thermal_get_trip_type() | *e69c3a0d9dnet: mctp: mark socks as dead on unhash, prevent re-add | *954cc215cdnet: ravb: Fix possible hang if RIS2_QFF1 happen | *0f7218bf0anet: ravb: Fix lack of register setting after system resumed for Gen3 | *3db4ca2938ravb: Rename "no_ptp_cfg_active" and "ptp_cfg_active" variables | *621f296f11gpio: mxc: Unlock on error path in mxc_flip_edge() | *071a839286nvme: fix passthrough csi check | *614471b7f7riscv/kprobe: Fix instruction simulation of JALR | *3391bd4235sctp: fail if no bound addresses can be used for a given scope | *b0784860e1net/sched: sch_taprio: do not schedule in taprio_reset() | *d2d3ab1b1dnetrom: Fix use-after-free of a listening socket. | *9df5ab02c6netfilter: conntrack: fix vtag checks for ABORT/SHUTDOWN_COMPLETE | *ca3cf94776ipv4: prevent potential spectre v1 gadget in fib_metrics_match() | *d50e7348b4ipv4: prevent potential spectre v1 gadget in ip_metrics_convert() | *ead06e3449netlink: annotate data races around sk_state | *c4eb423c6bnetlink: annotate data races around dst_portid and dst_group | *fac9b69a93netlink: annotate data races around nlk->portid | *8a13595600netfilter: nft_set_rbtree: skip elements in transaction from garbage collection | *2bf1435fa1netfilter: nft_set_rbtree: Switch to node list walk for overlap detection | *e481654426drm/i915/selftest: fix intel_selftest_modify_policy argument types | *66689a72banet: fix UaF in netns ops registration error path | *41b74e95f2netlink: prevent potential spectre v1 gadgets | *2f29d780bdi2c: designware: use casting of u64 in clock multiplication to avoid overflow | *b03f7ed9afscsi: ufs: core: Fix devfreq deadlocks | *858d7e9218net: mana: Fix IRQ name - add PCI and queue number | *bff5243bd3EDAC/qcom: Do not pass llcc_driv_data as edac_device_ctl_info's pvt_info | *5eedf4568dEDAC/device: Respect any driver-supplied workqueue polling value | *4b7dfd0a68ARM: 9280/1: mm: fix warning on phys_addr_t to void pointer assignment | *7807871f28ipv6: fix reachability confirmation with proxy_ndp | *f9a22f6fa1thermal: intel: int340x: Protect trip temperature from concurrent updates | *036093c08dKVM: arm64: GICv4.1: Fix race with doorbell on VPE activation/deactivation | *c56683c062KVM: x86/vmx: Do not skip segment attributes if unusable bit is set | *e91308e637ovl: fail on invalid uid/gid mapping at copy up | *33a9657d67ksmbd: limit pdu length size according to connection status | *8d83a758eeksmbd: downgrade ndr version error message to debug | *87a7f38a90ksmbd: do not sign response to session request for guest login | *4210c3555dksmbd: add max connections parameter | *cc6742b160ksmbd: add smbd max io size parameter | *3c8a5648a5i2c: mv64xxx: Add atomic_xfer method to driver | *e619ab4fb3i2c: mv64xxx: Remove shutdown method from driver | *4b83bc6f87cifs: Fix oops due to uncleared server->smbd_conn in reconnect | *89042d3d85ftrace/scripts: Update the instructions for ftrace-bisect.sh | *592ba7116ftrace_events_hist: add check for return value of 'create_hist_field' | *b0af180514tracing: Make sure trace_printk() can output as soon as it can be used | *91135d7233module: Don't wait for GOING modules | *85ee9919adKVM: SVM: fix tsc scaling cache logic | *f0227eca97scsi: hpsa: Fix allocation size for scsi_host_alloc() | *e5af9a458adrm/amdgpu: complete gfxoff allow signal during suspend without delay | *62b9e9f921Bluetooth: hci_sync: cancel cmd_timer if hci_open failed | *21998acd31exit: Use READ_ONCE() for all oops/warn limit reads | *e82b1598ebdocs: Fix path paste-o for /sys/kernel/warn_count | *1c51698ad6panic: Expose "warn_count" to sysfs | *0691ddae56panic: Introduce warn_limit | *7b98914a6cpanic: Consolidate open-coded panic_on_warn checks | *fc636b1362exit: Allow oops_limit to be disabled | *339f8a8e52exit: Expose "oops_count" to sysfs | *f80fb0001fexit: Put an upper limit on how often we can oops | *2857ce7f47panic: Separate sysctl logic from CONFIG_SMP | *e156d4dcb0ia64: make IA64_MCA_RECOVERY bool instead of tristate | *9024f77224csky: Fix function name in csky_alignment() and die() | *2ea497d153h8300: Fix build errors from do_exit() to make_task_dead() transition | *a452ca0228hexagon: Fix function name in die() | *3b39f47474objtool: Add a missing comma to avoid string concatenation | *39a26d8721exit: Add and use make_task_dead. | *b5c1acaa43kasan: no need to unset panic_on_warn in end_report() | *b5c967dc68ubsan: no need to unset panic_on_warn in ubsan_epilogue() | *e4cd210032panic: unset panic_on_warn inside panic() | *191f1f1f6akernel/panic: move panic sysctls to its own file | *654f6e8512sysctl: add a new register_sysctl_init() interface | *3aa991cde9fs: reiserfs: remove useless new_opts in reiserfs_remount | *d830531f8fx86: ACPI: cstate: Optimize C3 entry on AMD CPUs | *1f54762231drm/i915: Remove unused variable | *6e10127093Revert "selftests/bpf: check null propagation only neither reg is PTR_TO_BTF_ID" | *619ee31b96drm/i915: Allow switching away via vga-switcheroo if uninitialized | *ea435ba9ebfirmware: coreboot: Check size of table entry and use flex-array | *a4e70bcf2elockref: stop doing cpu_relax in the cmpxchg loop | *b0ee61f5eeplatform/x86: asus-nb-wmi: Add alternate mapping for KEY_SCREENLOCK | *e8d2f7f566platform/x86: touchscreen_dmi: Add info for the CSL Panther Tab HD | *2e0a8bacber8152: add vendor/device ID pair for Microsoft Devkit | *d4b717e34dscsi: hisi_sas: Set a port invalid only if there are no devices attached when refreshing port id | *e15750aa28KVM: s390: interrupt: use READ_ONCE() before cmpxchg() | *9300c65207spi: spidev: remove debug messages that access spidev->spi without locking | *48ff5d3812ASoC: fsl-asoc-card: Fix naming of AC'97 CODEC widgets | *5001ffb31dASoC: fsl_ssi: Rename AC'97 streams to avoid collisions with AC'97 CODEC | *b76120e206cpufreq: armada-37xx: stop using 0 as NULL pointer | *eda26fa856perf/x86/intel/uncore: Add Emerald Rapids | *544f9d4e9dperf/x86/msr: Add Emerald Rapids | *b1eb964d78s390: expicitly align _edata and _end symbols on page boundary | *fb45ec279bs390/debug: add _ASM_S390_ prefix to header guard | *cd488abed9drm: Add orientation quirk for Lenovo ideapad D330-10IGL | *ff7ab370b8net: usb: cdc_ether: add support for Thales Cinterion PLS62-W modem | *d6935084e4ASoC: fsl_micfil: Correct the number of steps on SX controls | *ac07316b2dcpufreq: Add SM6375 to cpufreq-dt-platdev blocklist | *f0e6dcae14kcsan: test: don't put the expect array on the stack | *c51c0b3754cpufreq: Add Tegra234 to cpufreq-dt-platdev blocklist | *28e4e8ca9escsi: iscsi: Fix multiple iSCSI session unbind events sent to userspace | *14b1df2004tcp: fix rate_app_limited to default to 1 | *120b8e527enet: stmmac: enable all safety features by default | *a7d736cc3cthermal: core: call put_device() only after device_register() fails | *ed08f958e4thermal/core: fix error code in __thermal_cooling_device_register() | *108a6f91e2thermal: Validate new state in cur_state_store() | *bd0ea77edfthermal/core: Rename 'trips' to 'num_trips' | *521c6ebd4fthermal/core: Remove duplicate information when an error occurs | *6504afa263net: dsa: microchip: ksz9477: port map correction in ALU table entry register | *18346db185selftests/net: toeplitz: fix race on tpacket_v3 block close | *caa28c7c83driver core: Fix test_async_probe_init saves device in wrong array | *89c62cee5dw1: fix WARNING after calling w1_process() | *3d0eafe413w1: fix deadloop in __w1_remove_master_device() | *7701a4bd45device property: fix of node refcount leak in fwnode_graph_get_next_endpoint() | *ed0d8f731eptdma: pt_core_execute_cmd() should use spinlock | *29e9c67bf3octeontx2-pf: Fix the use of GFP_KERNEL in atomic context on rt | *03bff5819atcp: avoid the lookup process failing to get sk in ehash table | *5bd69d2ea8nvme-pci: fix timeout request state check | *39178dfe86drm/amd/display: fix issues with driver unload | *9a5a537e14phy: phy-can-transceiver: Skip warning if no "max-bitrate" | *4095065b59dmaengine: xilinx_dma: call of_node_put() when breaking out of for_each_child_of_node() | *5bd3c1c1bccifs: fix potential deadlock in cache_refresh_path() | *1a2a47b85cHID: betop: check shape of output reports | *b2a7309743l2tp: prevent lockdep issue in l2tp_tunnel_register() | *edf0e509cevirtio-net: correctly enable callback during start_xmit | *d3401c7624net: macb: fix PTP TX timestamp failure due to packet padding | *71c6019655dmaengine: Fix double increment of client_count in dma_chan_get() | *1e7919f0b1drm/panfrost: fix GENERIC_ATOMIC64 dependency | *a1b3e50e21net: mlx5: eliminate anonymous module_init & module_exit | *09e3fb6f53net/mlx5: E-switch, Fix setting of reserved fields on MODIFY_SCHEDULING_ELEMENT | *01a6e10810net: ipa: disable ipa interrupt during suspend | *98aec50ff7Bluetooth: Fix possible deadlock in rfcomm_sk_state_change | *0e59f60b74usb: gadget: f_fs: Ensure ep0req is dequeued before free_request | *ae8e136bcausb: gadget: f_fs: Prevent race during ffs_ep0_queue_wait | *f25cd2b731HID: revert CHERRY_MOUSE_000C quirk | *39483511fdpinctrl: rockchip: fix mux route data for rk3568 | *1dae88a0b4net: stmmac: fix invalid call to mdiobus_get_phy() | *6716838bf8HID: check empty report_list in bigben_probe() | *2b49568254HID: check empty report_list in hid_validate_values() | *ad67de330dnet: mdio: validate parameter addr in mdiobus_get_phy() | *4869129379net: usb: sr9700: Handle negative len | *2827c4eb42octeontx2-pf: Avoid use of GFP_KERNEL in atomic context | *77e8ed776cl2tp: close all race conditions in l2tp_tunnel_register() | *af22d2c0b4l2tp: convert l2tp_tunnel_list to idr | *22c7d45ca3l2tp: Don't sleep and disable BH under writer-side sk_callback_lock | *87d9205d9al2tp: Serialize access to sk_user_data with sk_callback_lock | *c53acbf2fanet/sched: sch_taprio: fix possible use-after-free | *40516d042bnet: stmmac: Fix queue statistics reading | *620aa67f80pinctrl: rockchip: fix reading pull type on rk3568 | *ddca674af1pinctrl/rockchip: add error handling for pull/drive register getters | *259ab8fb8cpinctrl/rockchip: Use temporary variable for struct device | *8cbf932c5cwifi: rndis_wlan: Prevent buffer overflow in rndis_query_oid | *f792d26e5cgpio: mxc: Always set GPIOs used as interrupt source to INPUT mode | *8335f877efgpio: mxc: Protect GPIO irqchip RMW with bgpio spinlock | *fb4fb3d267gpio: use raw spinlock for gpio chip shadowed data | *52e3eebfe6sch_htb: Avoid grafting on htb_destroy_class_offload when destroying htb | *8232e5a84dnet: enetc: avoid deadlock in enetc_tx_onestep_tstamp() | *95347e41canet: wan: Add checks for NULL for utdm in undo_uhdlc_init and unmap_si_regs | *7f129927fenet: nfc: Fix use-after-free in local_cleanup() | *397aaac884phy: rockchip-inno-usb2: Fix missing clk_disable_unprepare() in rockchip_usb2phy_power_on() | *01bdcc73dbbpf: Fix pointer-leak due to insufficient speculative store bypass mitigation | *261e2f12b6amd-xgbe: Delay AN timeout during KR training | *a8cf4af544amd-xgbe: TX Flow Ctrl Registers are h/w ver dependent | *8e897cb674ARM: dts: at91: sam9x60: fix the ddr clock for sam9x60 | *0a27dcd534NFSD: fix use-after-free in nfsd4_ssc_setup_dul() | *24af570c99phy: ti: fix Kconfig warning and operator precedence | *631fc36685arm64: dts: qcom: msm8992-libra: Fix the memory map | *dda20ffec8arm64: dts: qcom: msm8992-libra: Add CPU regulators | *37ba5e9293arm64: dts: qcom: msm8992: Don't use sfpb mutex | *bab87524f6PM: AVS: qcom-cpr: Fix an error handling path in cpr_probe() | *b7a479c764affs: initialize fsdata in affs_truncate() | *623d111689IB/hfi1: Remove user expected buffer invalidate race | *47d5fc0dcdIB/hfi1: Immediately remove invalid memory from hardware | *85caef2cfdIB/hfi1: Fix expected receive setup error exit issues | *cb193984d4IB/hfi1: Reserve user expected TIDs | *891ddfae39IB/hfi1: Reject a zero-length user expected buffer | *362c948972RDMA/core: Fix ib block iterator counter overflow | *e26c571c3btomoyo: fix broken dependency on *.conf.default | *7dfe83ecc3firmware: arm_scmi: Harden shared memory access in fetch_notification | *a653dbb70cfirmware: arm_scmi: Harden shared memory access in fetch_response | *caffa7fed1EDAC/highbank: Fix memory leak in highbank_mc_probe() | *95de286200reset: uniphier-glue: Fix possible null-ptr-deref | *4773a8cf9areset: uniphier-glue: Use reset_control_bulk API | *7b33accc8fsoc: imx8m: Fix incorrect check for of_clk_get_by_name() | *f07427f8d9arm64: dts: imx8mm-venice-gw7901: fix USB2 controller OC polarity | *c4cb73febeHID: intel_ish-hid: Add check for ishtp_dma_tx_map | *25f97c9883ARM: imx: add missing of_node_put() | *3e9d79ded9arm64: dts: imx8mm-beacon: Fix ecspi2 pinmux | *5381350761ARM: dts: imx6qdl-gw560x: Remove incorrect 'uart-has-rtscts' | *0e4bba1656ARM: dts: imx7d-pico: Use 'clock-frequency' | *108cf4c6d5ARM: dts: imx6ul-pico-dwarf: Use 'clock-frequency' | *207c9e64edarm64: dts: imx8mp-phycore-som: Remove invalid PMIC property | *7ce380fe75dmaengine: ti: k3-udma: Do conditional decrement of UDMA_CHAN_RT_PEER_BCNT_REG | *edba9b7a70memory: mvebu-devbus: Fix missing clk_disable_unprepare in mvebu_devbus_probe() | *e66f6949damemory: atmel-sdramc: Fix missing clk_disable_unprepare in atmel_ramc_probe() | *eda11ab556memory: tegra: Remove clients SID override programming * |cab35cbd71Revert "xhci: Add update_hub_device override for PCI xHCI hosts" * |29e8f224d8Revert "xhci: Detect lpm incapable xHC USB3 roothub ports from ACPI tables" * |5739b27e8fRevert "xhci: Add a flag to disable USB3 lpm on a xhci root port level." * |5b60fdf2e0Merge 5.15.90 into android13-5.15-lts |\| | *aabd5ba7e9Linux 5.15.90 | *4b6f8263e9io_uring/rw: remove leftover debug statement | *b10acfcd61io_uring/rw: ensure kiocb_end_write() is always called | *124fb13cc7io_uring: fix double poll leak on repolling | *e944f1e37bio_uring: Clean up a false-positive warning from GCC 9.3.0 | *940e8922c1mm/khugepaged: fix collapse_pte_mapped_thp() to allow anon_vma | *e83cc8a780soc: qcom: apr: Make qcom,protection-domain optional again | *982c8b1e95Revert "wifi: mac80211: fix memory leak in ieee80211_if_add()" | *40a4797e08block: mq-deadline: Rename deadline_is_seq_writes() | *3abf10b4c4net/mlx5: fix missing mutex_unlock in mlx5_fw_fatal_reporter_err_work() | *1aab00aa41net/ulp: use consistent error code when blocking ULP | *2e4c95a404io_uring/net: fix fast_iov assignment in io_setup_async_msg() | *311b298a33io_uring: io_kiocb_update_pos() should not touch file for non -1 offset | *487a086595tracing: Use alignof__(struct {type b;}) instead of offsetof() | *430443f856x86/fpu: Use _Alignof to avoid undefined behavior in TYPE_ALIGN | *f114717dfaRevert "drm/amdgpu: make display pinning more flexible (v2)" | *7a993c1be5efi: rt-wrapper: Add missing include | *de2af657caarm64: efi: Execute runtime services from a dedicated stack | *9cca110cf8fs/ntfs3: Fix attr_punch_hole() null pointer derenference | *d4d112e5c4drm/amdgpu: drop experimental flag on aldebaran | *c82fa690dadrm/amd/display: Fix COLOR_SPACE_YCBCR2020_TYPE matrix | *88c3375224drm/amd/display: Calculate output_color_space after pixel encoding adjustment | *87e605b161drm/amd/display: Fix set scaling doesn's work | *8687b8cdc3drm/i915/display: Check source height is > 0 | *5d96179166drm/i915: re-disable RC6p on Sandy Bridge | *e9a7ec188bmei: me: add meteor lake point M DID | *eb0421d90fgsmi: fix null-deref in gsmi_get_variable | *b8d99cda52serial: atmel: fix incorrect baudrate setup | *b85498385aserial: amba-pl011: fix high priority character transmission in rs486 mode | *0f150134dddmaengine: idxd: Let probe fail when workqueue cannot be enabled | *1e8c127c2edmaengine: tegra210-adma: fix global intr clear | *473e2281f7dmaengine: lgm: Move DT parsing after initialization | *73337724cbserial: pch_uart: Pass correct sg to dma_unmap_sg() | *4307a41cbcdt-bindings: phy: g12a-usb3-pcie-phy: fix compatible string documentation | *c9d55f564adt-bindings: phy: g12a-usb2-phy: fix compatible string documentation | *78aa45bb7ausb-storage: apply IGNORE_UAS only for HIKSEMI MD202 on RTL9210 | *a69c8dfb85usb: gadget: f_ncm: fix potential NULL ptr deref in ncm_bitrate() | *1ab67e87b1usb: gadget: g_webcam: Send color matching descriptor per frame | *b08167d8f0usb: typec: altmodes/displayport: Fix pin assignment calculation | *7fb1322e7ausb: typec: altmodes/displayport: Add pin assignment helper | *59f9ee3796usb: typec: tcpm: Fix altmode re-registration causes sysfs create fail | *a1c8a5c2f8usb: host: ehci-fsl: Fix module alias | *f073d10cd5usb: cdns3: remove fetched trb from cache before dequeuing | *73f4bde973USB: serial: cp210x: add SCALANCE LPE-9000 device id | *a2e075f401USB: gadgetfs: Fix race between mounting and unmounting | *2da67bff29tty: fix possible null-ptr-defer in spk_ttyio_release | *cb53a3366etty: serial: qcom-geni-serial: fix slab-out-of-bounds on RX FIFO buffer | *f322dd2e4astaging: mt7621-dts: change some node hex addresses to lower case | *6508788b2cbpf: restore the ebpf program ID for BPF_AUDIT_UNLOAD and PERF_BPF_EVENT_PROG_UNLOAD | *7b122c33bdriscv: dts: sifive: fu740: fix size of pcie 32bit memory | *701f9c3da6thunderbolt: Use correct function to calculate maximum USB3 link rate | *5b1b03a3d3cifs: do not include page data when checking signature | *64287cd456btrfs: fix race between quota rescan and disable leading to NULL pointer deref | *f2e0e1615dbtrfs: do not abort transaction on failure to write log tree when syncing log | *f653abe619mmc: sdhci-esdhc-imx: correct the tuning start tap and step setting | *9881436f01mmc: sunxi-mmc: Fix clock refcount imbalance during unbind | *33bd0db750ACPI: PRM: Check whether EFI runtime is available | *87e1ee6058comedi: adv_pci1760: Fix PWM instruction handling | *b5d24a8e4ausb: core: hub: disable autosuspend for TI TUSB8041 | *61a0890cb9misc: fastrpc: Fix use-after-free race condition for maps | *1b7b7bb400misc: fastrpc: Don't remove map on creater_process and device_release | *e7e41fcf90USB: misc: iowarrior: fix up header size for USB_DEVICE_ID_CODEMERCS_IOW100 | *f3de34d90dstaging: vchiq_arm: fix enum vchiq_status return types | *16d09c4bc9USB: serial: option: add Quectel EM05CN modem | *34d769f0c6USB: serial: option: add Quectel EM05CN (SG) modem | *768d56ed24USB: serial: option: add Quectel EC200U modem | *829916f069USB: serial: option: add Quectel EM05-G (RS) modem | *eb8808f769USB: serial: option: add Quectel EM05-G (CS) modem | *6e0430db19USB: serial: option: add Quectel EM05-G (GR) modem | *f01aefe374prlimit: do_prlimit needs to have a speculation check | *418e2c756dxhci: Detect lpm incapable xHC USB3 roothub ports from ACPI tables | *10cb7d53beusb: acpi: add helper to check port lpm capability using acpi _DSM | *1818e2a97dxhci: Add a flag to disable USB3 lpm on a xhci root port level. | *8911ff7963xhci: Add update_hub_device override for PCI xHCI hosts | *c462ac871fxhci: Fix null pointer dereference when host dies | *f39c813af0usb: xhci: Check endpoint is valid before dereferencing it | *0f175cebc4xhci-pci: set the dma max_seg_size | *89a410dbd0io_uring/rw: defer fsnotify calls to task context | *05d69b372bio_uring: do not recalculate ppos unnecessarily | *ff8a070253io_uring: update kiocb->ki_pos at execution time | *b7958caf41io_uring: remove duplicated calls to io_kiocb_ppos | *86e2d6901aio_uring: ensure that cached task references are always put on exit | *30b9068934io_uring: fix async accept on O_NONBLOCK sockets | *a79b13f249io_uring: allow re-poll if we made progress | *3c1a3d0269io_uring: support MSG_WAITALL for IORING_OP_SEND(MSG) | *390b881631io_uring: add flag for disabling provided buffer recycling | *9b7b0f2116io_uring: ensure recv and recvmsg handle MSG_WAITALL correctly | *cdc68e714dio_uring: improve send/recv error handling | *ccf06b5a98io_uring: pass in EPOLL_URING_WAKE for eventfd signaling and wakeups | *77baf39227eventfd: provide a eventfd_signal_mask() helper | *a2d8ff00a7eventpoll: add EPOLL_URING_WAKE poll wakeup flag | *a9aa4aa7a5io_uring: don't gate task_work run on TIF_NOTIFY_SIGNAL | *bd9a23a4bbhugetlb: unshare some PMDs when splitting VMAs | *393d9e3ed1drm/amd: Delay removal of the firmware framebuffer | *865e244e06drm/amdgpu: disable runtime pm on several sienna cichlid cards(v2) | *560373fb1eALSA: hda/realtek: fix mute/micmute LEDs don't work for a HP platform | *26264260a8ALSA: hda/realtek: fix mute/micmute LEDs for a HP ProBook | *1026756321efi: fix userspace infinite retry read efivars after EFI runtime services page fault | *45627a1a64nilfs2: fix general protection fault in nilfs_btree_insert() | *350d66d9e7zonefs: Detect append writes at invalid locations | *5054d001ffAdd exception protection processing for vd in axi_chan_handle_err function | *a12fd43bd1wifi: mac80211: sdata can be NULL during AMPDU start | *f96a6c009ewifi: brcmfmac: fix regression for Broadcom PCIe wifi devices | *908d1742b6Bluetooth: hci_qca: Fix driver shutdown on closed serdev | *7530fbc05ffbdev: omapfb: avoid stack overflow warning | *e1df7f0b27perf/x86/rapl: Treat Tigerlake like Icelake | *2c129e8689f2fs: let's avoid panic if extent_tree is not created | *58bac74402x86/asm: Fix an assembler warning with current binutils | *fdb4a70bb7btrfs: always report error in run_one_delayed_ref() | *f641067ea2RDMA/srp: Move large values to a new enum for gcc13 | *793f8ac218r8169: move rtl_wol_enable_rx() and rtl_prepare_power_down() | *dc072762f9net/ethtool/ioctl: return -EOPNOTSUPP if we have no phy stats | *308d24d875vduse: Validate vq_num in vduse_validate_config() | *8e1eb926a0virtio_pci: modify ENOENT to EINVAL | *64a6f3689dtools/virtio: initialize spinlocks in vring_test.c | *95fc28a8e9selftests/bpf: check null propagation only neither reg is PTR_TO_BTF_ID | *d4a9d2944fpNFS/filelayout: Fix coalescing test for single DS | *6a3319af6bbtrfs: fix trace event name typo for FLUSH_DELAYED_REFS * |52cea9ba91Merge "Merge 5.15.89 into android13-5.15-lts" into android13-5.15-lts |\ \ | * |de550d72f1Merge 5.15.89 into android13-5.15-lts | |\| | | *3bcc86eb3eLinux 5.15.89 | | *37c18ef49epinctrl: amd: Add dynamic debugging for active GPIOs | | *a5841b81adRevert "usb: ulpi: defer ulpi_register on ulpi_read_id timeout" | | *7ec9a45fc4block: handle bio_split_to_limits() NULL return | | *ba86db02d4io_uring/io-wq: only free worker if it was allocated for creation | | *bb135bcc94io_uring/io-wq: free worker if task_work creation is canceled | | *63c2fa09b8scsi: mpt3sas: Remove scsi_dma_map() error messages | | *e2ea555642efi: fix NULL-deref in init error path | | *94b6cf84dbarm64: cmpxchg_double*: hazard against entire exchange variable | | *3891fa4982arm64: atomics: remove LL/SC trampolines | | *61e86339afarm64: atomics: format whitespace consistently | | *ed4629d1e9io_uring: lock overflowing for IOPOLL | | *fbf5015141KVM: x86: Do not return host topology information from KVM_GET_SUPPORTED_CPUID | | *ee16841134Documentation: KVM: add API issues section | | *b8f3b3cffbmm: Always release pages to the buddy allocator in memblock_free_late(). | | *d2dc110deaplatform/surface: aggregator: Add missing call to ssam_request_sync_free() | | *cfd5978411igc: Fix PPS delta between two synchronized end-points | | *0bf52601ceperf build: Properly guard libbpf includes | | *205f35eee7net/mlx5e: Don't support encap rules with gbp option | | *0526fc9330net/mlx5: Fix ptp max frequency adjustment range | | *9e2c38827cnet/sched: act_mpls: Fix warning during failed attribute validation | | *e3bb44beaftools/nolibc: fix the O_* fcntl/open macro definitions for riscv | | *1e6ec75bb3tools/nolibc: restore mips branch ordering in the _start block | | *bd0431a66ctools/nolibc: Remove .global _start from the entry point code | | *a77c54f5b5tools/nolibc/arch: mark the _start symbol as weak | | *da51e086d1tools/nolibc/arch: split arch-specific code into individual files | | *8591e788betools/nolibc/types: split syscall-specific definitions into their own files | | *4fceecdeaatools/nolibc/std: move the standard type definitions to std.h | | *1792136f22tools/nolibc: use pselect6 on RISCV | | *487386a49etools/nolibc: x86-64: Use `mov $60,%eax` instead of `mov $60,%rax` | | *27af4f2260tools/nolibc: x86: Remove `r8`, `r9` and `r10` from the clobber list | | *a60b24192baf_unix: selftest: Fix the size of the parameter to connect() | | *39ae73e581nfc: pn533: Wait for out_urb's completion in pn533_usb_send_frame() | | *f6003784b1hvc/xen: lock console list traversal | | *79c58b7424octeontx2-af: Fix LMAC config in cgx_lmac_rx_tx_enable | | *303d062881tipc: fix unexpected link reset due to discovery messages | | *e79d0f97ccALSA: usb-audio: Relax hw constraints for implicit fb sync | | *c9557906bdALSA: usb-audio: Make sure to stop endpoints before closing EPs | | *83e758105bASoC: wm8904: fix wrong outputs volume after power reactivation | | *7c26d21872scsi: ufs: core: WLUN suspend SSU/enter hibern8 fail recovery | | *513fdf0b8escsi: ufs: Stop using the clock scaling lock in the error handler | | *13259b60b7scsi: mpi3mr: Refer CONFIG_SCSI_MPI3MR in Makefile | | *470f6a9175regulator: da9211: Use irq handler when ready | | *24107ad469x86/resctrl: Fix task CLOSID/RMID update race | | *cd3da505fbEDAC/device: Fix period calculation in edac_device_reset_delay_period() | | *ab0d02c53ax86/boot: Avoid using Intel mnemonics in AT&T syntax asm | | *a90d339f1fpowerpc/imc-pmu: Fix use of mutex in IRQs disabled section | | *511cf17b24netfilter: ipset: Fix overflow before widen in the bitmap_ip_create() function. | | *b22faa21b6sched/core: Fix use-after-free bug in dup_user_cpus_ptr() | | *d766ccadbeiommu/mediatek-v1: Fix an error handling path in mtk_iommu_v1_probe() | | *c929a230c8iommu/iova: Fix alloc iova overflows issue | | *4b51aa263ausb: ulpi: defer ulpi_register on ulpi_read_id timeout | | *9a8bf443f6bus: mhi: host: Fix race between channel preparation and M0 event | | *456e3794e0ipv6: raw: Deduct extension header length in rawv6_push_pending_frames | | *4c93422a54ixgbe: fix pci device refcount leak | | *e97da5d97aplatform/x86: sony-laptop: Don't turn off 0x153 keyboard backlight during probe | | *f3b1e04dafdt-bindings: msm/dsi: Don't require vcca-supply on 14nm PHY | | *52a5f596c6dt-bindings: msm/dsi: Don't require vdds-supply on 10nm PHY | | *984ad875dbdrm/msm/dp: do not complete dp_aux_cmd_fifo_tx() if irq is not for aux transfer | | *92ae83665eplatform/x86: ideapad-laptop: Add Legion 5 15ARH05 DMI id to set_fn_lock_led_list[] | | *e38b5f81dfdt-bindings: msm: dsi-phy-28nm: Add missing qcom, dsi-phy-regulator-ldo-mode | | *bb32ab40cbdt-bindings: msm: dsi-controller-main: Fix description of core clock | | *3fb8d10beedt-bindings: msm: dsi-controller-main: Fix power-domain constraint | | *dc5b651caddrm/msm/adreno: Make adreno quirks not overwrite each other | | *757d665ee1dt-bindings: msm: dsi-controller-main: Fix operating-points-v2 constraint | | *c90cf47d30platform/x86: dell-privacy: Fix SW_CAMERA_LENS_COVER reporting | | *25b5f693bcplatform/surface: aggregator: Ignore command messages not intended for us | | *ee7b8ce2ccplatform/x86: dell-privacy: Only register SW_CAMERA_LENS_COVER if present | | *e0072068adcifs: Fix uninitialized memory read for smb311 posix symlink create | | *f3495b5e9enet/mlx5e: Set action fwd flag when parsing tc action goto | | *1a8431cc20drm/i915/gt: Reset twice | | *011ecdbcd5drm/virtio: Fix GEM handle creation UAF | | *798dfeeae3s390/percpu: add READ_ONCE() to arch_this_cpu_to_op_simple() | | *a400593eb3s390/cpum_sf: add READ_ONCE() semantics to compare and swap loops | | *d4fa65960aASoC: qcom: lpass-cpu: Fix fallback SD line index handling | | *8400b91c11s390/kexec: fix ipl report address for kdump | | *c07e0babd1perf auxtrace: Fix address filter duplicate symbol selection | | *e81d82da61net: stmmac: add aux timestamps fifo clearance wait | | *44167b74a8docs: Fix the docs build with Sphinx 6.0 | | *24176bf2a1efi: tpm: Avoid READ_ONCE() for accessing the event log | | *01b966b14cselftests: kvm: Fix a compile error in selftests/kvm/rseq_test.c | | *c773ebe11cKVM: arm64: nvhe: Fix build with profile optimization | | *c1d6a72fc8KVM: arm64: Fix S1PTW handling on RO memslots | | *e04e6cd883ALSA: hda/realtek: Enable mute/micmute LEDs on HP Spectre x360 13-aw0xxx | | *b983c9a971ALSA: hda/realtek - Turn on power early | | *9ab3696881ALSA: control-led: use strscpy in set_led_id() | | *a8acfe2c6fnetfilter: nft_payload: incorrect arithmetics when fetching VLAN header bits * | |2c4f6d72f1Merge "Merge 5.15.88 into android13-5.15-lts" into android13-5.15-lts |\| | | * |773ec50a8aMerge 5.15.88 into android13-5.15-lts | |\| | | *90bb4f8f39Linux 5.15.88 | | *cbd3e6d5e5ALSA: hda - Enable headset mic on another Dell laptop with ALC3254 | | *b98dee4746ALSA: hda/hdmi: Add a HP device 0x8715 to force connect list | | *26350c21bcALSA: pcm: Move rwsem lock inside snd_ctl_elem_read to prevent UAF | | *dadd0dcaa6net/ulp: prevent ULP without clone op from entering the LISTEN status | | *04941c1d5bnet: sched: disallow noqueue for qdisc classes | | *068b512193serial: fixup backport of "serial: Deassert Transmit Enable on probe in driver-specific way" | | *46aa155758selftests/vm/pkeys: Add a regression test for setting PKRU through ptrace | | *3c1940c549x86/fpu: Emulate XRSTOR's behavior if the xfeatures PKRU bit is not set | | *3f1c81426ax86/fpu: Allow PKRU to be (once again) written by ptrace. | | *b29773d6b0x86/fpu: Add a pkru argument to copy_uabi_to_xstate() | | *9813c5fc22x86/fpu: Add a pkru argument to copy_uabi_from_kernel_to_xstate(). | | *fea26e83a1x86/fpu: Take task_struct* in copy_sigframe_from_user_to_xstate() | | *d4d152017eparisc: Align parisc MADV_XXX constants with all other architectures | * |1867565896Revert "ASoC/SoundWire: dai: expand 'stream' concept beyond SoundWire" | * |43064ed394Revert "ASoC: Intel/SOF: use set_stream() instead of set_tdm_slots() for HDAudio" | * |959d50edd2Revert "PM/devfreq: governor: Add a private governor_data for governor" * | |c34c76a947Revert "ASoC/SoundWire: dai: expand 'stream' concept beyond SoundWire" * | |33ef84070bRevert "ASoC: Intel/SOF: use set_stream() instead of set_tdm_slots() for HDAudio" * | |e60641bdcaRevert "PM/devfreq: governor: Add a private governor_data for governor" * | |793ec0a9ccMerge "Merge 5.15.87 into android13-5.15-lts" into android13-5.15-lts |\| | | * |fc4de343bdMerge 5.15.87 into android13-5.15-lts | |\| | | *d57287729eLinux 5.15.87 | | *24186c6822drm/mgag200: Fix PLL setup for G200_SE_A rev >=4 | | *e326ee018aio_uring: Fix unsigned 'res' comparison with zero in io_fixup_rw_res() | | *b2b6eefab4efi: random: combine bootloader provided RNG seed with RNG protocol output | | *99c0759495mbcache: Avoid nesting of cache->c_list_lock under bit locks | | *d50d6c193anet: hns3: fix return value check bug of rx copybreak | | *d4e6a13eb9btrfs: make thaw time super block check to also verify checksum | | *70a1dccd0eselftests: set the BUILD variable to absolute path | | *58fef3ebc8ext4: don't allow journal inode to have encrypt flag | | *bd5dc96feamptcp: use proper req destructor for IPv6 | | *78bd6ab52cmptcp: dedicated request sock for subflow in v6 | | *6e9c1aef3eRevert "ACPI: PM: Add support for upcoming AMD uPEP HID AMDI007" | | *e32f867b37ksmbd: check nt_len to be at least CIFS_ENCPWD_SIZE in ksmbd_decode_ntlmssp_auth_blob | | *4136f1ac1eksmbd: fix infinite loop in ksmbd_conn_handler_loop() | | *f10defb0behfs/hfsplus: avoid WARN_ON() for sanity check, use proper error handling | | *48d9e2e6dehfs/hfsplus: use WARN_ON for sanity check | | *f5a9bbf962drm/i915/gvt: fix vgpu debugfs clean in remove | | *ae9a615117drm/i915/gvt: fix gvt debugfs destroy | | *eb3e943a32riscv, kprobes: Stricter c.jr/c.jalr decoding | | *620a229f57riscv: uaccess: fix type of 0 variable on error in get_user() | | *8e05a993f8thermal: int340x: Add missing attribute for data rate base | | *c3222fd282io_uring: fix CQ waiting timeout handling | | *b7b9bc9305block: don't allow splitting of a REQ_NOWAIT bio | | *e1358c8787fbdev: matroxfb: G200eW: Increase max memory from 1 MB to 16 MB | | *682a7d064fnfsd: fix handling of readdir in v4root vs. mount upcall timeout | | *cb42aa7b5fx86/bugs: Flush IBP in ib_prctl_set() | | *554a880a1fx86/kexec: Fix double-free of elf header buffer | | *264241a610btrfs: check superblock to ensure the fs was not modified at thaw time | | *69f4bda5f4nvme: also return I/O command effects from nvme_command_effects | | *a6a4b057cdnvmet: use NVME_CMD_EFFECTS_CSUPP instead of open coding it | | *f9309dcaa9io_uring: check for valid register opcode earlier | | *4df413d469nvme: fix multipath crash caused by flush request when blktrace is enabled | | *03ce792128ASoC: Intel: bytcr_rt5640: Add quirk for the Advantech MICA-071 tablet | | *0dca7375e2udf: Fix extension of the last extent in the file | | *dc1bc90397caif: fix memory leak in cfctrl_linkup_request() | | *bce3680b48drm/i915: unpin on error in intel_vgpu_shadow_mm_pin() | | *da6a3653b8perf stat: Fix handling of --for-each-cgroup with --bpf-counters to match non BPF mode | | *11cd4ec635usb: rndis_host: Secure rndis_query check against int overflow | | *6ea5273c71octeontx2-pf: Fix lmtst ID used in aura free | | *4e5f2c74cbdrivers/net/bonding/bond_3ad: return when there's no aggregator | | *8414983c2efs/ntfs3: don't hold ni_lock when calling truncate_setsize() | | *a23e8376e6drm/imx: ipuv3-plane: Fix overlay plane width | | *a8f7fd322fperf tools: Fix resources leak in perf_data__open_dir() | | *a1e1521b46netfilter: ipset: Rework long task execution when adding/deleting entries | | *6f19a38483netfilter: ipset: fix hash:net,port,net hang with /0 subnet | | *774d259749net: sparx5: Fix reading of the MAC address | | *04dc4003e5net: sched: cbq: dont intepret cls results when asked to drop | | *f02327a487net: sched: atm: dont intepret cls results when asked to drop | | *95da1882cegpio: sifive: Fix refcount leak in sifive_gpio_probe | | *da9c9883ecceph: switch to vfs_inode_has_locks() to fix file lock bug | | *54e72ce5f1filelock: new helper: vfs_inode_has_locks | | *f34b03ce3adrm/meson: Reduce the FIFO lines held when AFBC is not used | | *05a8410b0fRDMA/mlx5: Fix validation of max_rd_atomic caps for DC | | *8d89870d63RDMA/mlx5: Fix mlx5_ib_get_hw_stats when used for device | | *4d112f0016net: phy: xgmiitorgmii: Fix refcount leak in xgmiitorgmii_probe | | *e5fbeb3d16net: ena: Update NUMA TPH hint register upon NUMA node update | | *7840b93cfdnet: ena: Set default value for RX interrupt moderation | | *d09b7a9d2fnet: ena: Fix rx_copybreak value update | | *0e7ad9b006net: ena: Use bitmask to indicate packet redirection | | *5d4964984bnet: ena: Account for the number of processed bytes in XDP | | *f17d9aec07net: ena: Don't register memory info on XDP exchange | | *a4aa727ad0net: ena: Fix toeplitz initial hash value | | *0bec17f1cenet: amd-xgbe: add missed tasklet_kill | | *cb2f74685fnet/mlx5e: Fix hw mtu initializing at XDP SQ allocation | | *6c72abb78bnet/mlx5e: Always clear dest encap in neigh-update-del | | *b36783bc11net/mlx5e: TC, Refactor mlx5e_tc_add_flow_mod_hdr() to get flow attr | | *f8c10eeba3net/mlx5e: IPoIB, Don't allow CQE compression to be turned on by default | | *7227bbb7c1net/mlx5: Avoid recovery in probe flows | | *9369b9afa8net/mlx5: Add forgotten cleanup calls into mlx5_init_once() error path | | *d966f2ee4bnet/mlx5: E-Switch, properly handle ingress tagged packets on VST | | *6a37a01abavdpa_sim: fix vringh initialization in vdpasim_queue_ready() | | *e3462410c3vhost: fix range used in translate_desc() | | *13871f60ecvringh: fix range used in iotlb_translate() | | *e05d4c8c28vhost/vsock: Fix error handling in vhost_vsock_init() | | *586e6fd7d5vdpa_sim: fix possible memory leak in vdpasim_net_init() and vdpasim_blk_init() | | *b63bc2db24nfc: Fix potential resource leaks | | *945e58bdafnet: dsa: mv88e6xxx: depend on PTP conditionally | | *95df720e64qlcnic: prevent ->dcb use-after-free on qlcnic_dcb_enable() failure | | *6c55953e23net: sched: fix memory leak in tcindex_set_parms | | *d14a4b24d5net: hns3: fix VF promisc mode not update when mac table full | | *7ed205b947net: hns3: fix miss L3E checking for rx packet | | *47868cb77fnet: hns3: extract macro to simplify ring stats update code | | *7457c5a776net: hns3: refactor hns3_nic_reuse_page() | | *4a6e9fb534net: hns3: add interrupts re-initialization while doing VF FLR | | *5e48ed805cnfsd: shut down the NFSv4 state objects before the filecache | | *7e2825f5fbveth: Fix race with AF_XDP exposing old or uninitialized descriptors | | *ac95cdafacnetfilter: nf_tables: honor set timeout and garbage collection updates | | *49677ea151vmxnet3: correctly report csum_level for encapsulated packet | | *9d30cb4421netfilter: nf_tables: perform type checking for existing sets | | *c3bfb7784anetfilter: nf_tables: add function to create set stateful expressions | | *996cd779c2netfilter: nf_tables: consolidate set description | | *4f1105ee72drm/panfrost: Fix GEM handle creation ref-counting | | *df493f676fbpf: pull before calling skb_postpull_rcsum() | | *d7e817e689btrfs: fix an error handling path in btrfs_defrag_leaves() | | *4d69cdba2cSUNRPC: ensure the matching upcall is in-flight upon downcall | | *af0265dfefdrm/i915/migrate: fix length calculation | | *8b25a526a5drm/i915/migrate: fix offset calculation | | *a3d1e6f9b6drm/i915/migrate: don't check the scratch page | | *5bc0b2fda4ext4: fix deadlock due to mbcache entry corruption | | *a6e4094fafmbcache: automatically delete entries from cache on freeing | | *1872549129ext4: correct inconsistent error msg in nojournal mode | | *761f88f82eext4: goto right label 'failed_mount3a' | | *eb16602140ravb: Fix "failed to switch device to config mode" message during unbind | | *4216995dbdperf probe: Fix to get the DW_AT_decl_file and DW_AT_call_file as unsinged data | | *d8bbbf2b52perf probe: Use dwarf_attr_integrate as generic DWARF attr accessor | | *b131b5f136media: s5p-mfc: Fix in register read and write for H264 | | *ff27800c0amedia: s5p-mfc: Clear workbit to handle error condition | | *4653ba32admedia: s5p-mfc: Fix to handle reference queue during finishing | | *1bd7283dc0x86/MCE/AMD: Clear DFR errors found in THR handler | | *5ddcd349d9x86/mce: Get rid of msr_ops | | *b8e7ed42bcbtrfs: fix extent map use-after-free when handling missing device in read_one_chunk | | *9c3beebd21btrfs: move missing device handling in a dedicate function | | *7528b21cebbtrfs: replace strncpy() with strscpy() | | *4cef44525fphy: qcom-qmp-combo: fix out-of-bounds clock access | | *855edc4ec6ARM: renumber bits related to _TIF_WORK_MASK | | *18f28f1330ext4: fix off-by-one errors in fast-commit block filling | | *b205332b6bext4: fix unaligned memory access in ext4_fc_reserve_space() | | *9c197dcbacext4: add missing validation of fast-commit record lengths | | *6220ec4055ext4: don't set up encryption key during jbd2 transaction | | *6482d42bafext4: disable fast-commit of encrypted dir operations | | *6969367c15ext4: fix potential out of bound read in ext4_fc_replay_scan() | | *818175ae3bext4: factor out ext4_fc_get_tl() | | *ffd84d0bc5ext4: introduce EXT4_FC_TAG_BASE_LEN helper | | *37914e029bext4: use ext4_debug() instead of jbd_debug() | | *b0ed9a032eext4: remove unused enum EXT4_FC_COMMIT_FAILED | | *394514ddf9tracing: Fix issue of missing one synthetic field | | *5234dd5d20block: mq-deadline: Fix dd_finish_request() for zoned devices | | *78623b10fcdrm/amdgpu: make display pinning more flexible (v2) | | *6363da2c85drm/amdgpu: handle polaris10/11 overlap asics (v2) | | *2771c7a0eeext4: allocate extended attribute value in vmalloc area | | *e995ff918eext4: avoid unaccounted block allocation when expanding inode | | *877247222aext4: initialize quota before expanding inode in setproject ioctl | | *322cf639b0ext4: fix inode leak in ext4_xattr_inode_create() on an error path | | *6380a93b57ext4: fix kernel BUG in 'ext4_write_inline_data_end()' | | *dc3bbc9753ext4: avoid BUG_ON when creating xattrs | | *844c405552ext4: fix error code return to user-space in ext4_get_branch() | | *b870b28e29ext4: fix corruption when online resizing a 1K bigalloc fs | | *d440d6427aext4: fix delayed allocation bug in ext4_clu_mapped for bigalloc + inline | | *def7a39091ext4: init quota for 'old.inode' in 'ext4_rename' | | *3c31d8d3adext4: fix uninititialized value in 'ext4_evict_inode' | | *871800770dext4: fix leaking uninitialized memory in fast-commit journal | | *d480a49c15ext4: fix bug_on in __es_tree_search caused by bad boot loader inode | | *91009e361eext4: check and assert if marking an no_delete evicting inode dirty | | *820eacbc4eext4: fix reserved cluster accounting in __es_remove_extent() | | *0dcbf4dc3dext4: fix bug_on in __es_tree_search caused by bad quota inode | | *06a20a68bbext4: add helper to check quota inums | | *f7e6b5548fext4: add EXT4_IGET_BAD flag to prevent unexpected bad inode | | *205ac16628ext4: fix undefined behavior in bit shift for ext4_check_flag_values | | *cf0e0817b0ext4: fix use-after-free in ext4_orphan_cleanup | | *970bfd7a41fs: ext4: initialize fsdata in pagecache_write() | | *744bbde378ext4: remove trailing newline from ext4_msg() message | | *7192afa5e4ext4: add inode table check in __ext4_get_inode_loc to aovid possible infinite loop | | *0d041b7251ext4: silence the warning when evicting inode with dioread_nolock | | *af4ceb00ebdrm/ingenic: Fix missing platform_driver_unregister() call in ingenic_drm_init() | | *c919e1154bdrm/i915/dsi: fix VBT send packet port selection for dual link DSI | | *6948e570f5drm/vmwgfx: Validate the box size for the snooped cursor | | *5594fde1efdrm/connector: send hotplug uevent on connector cleanup | | *317ebe61a6device_cgroup: Roll back to original exceptions after copy failure | | *ac838c663bparisc: led: Fix potential null-ptr-deref in start_task() | | *2c1881f081remoteproc: core: Do pm_relax when in RPROC_OFFLINE state | | *9b615f957ciommu/amd: Fix ivrs_acpihid cmdline parsing code | | *35b792179bphy: qcom-qmp-combo: fix sc8180x reset | | *dfd05a1335driver core: Fix bus_type.match() error handling in __driver_attach() | | *44618a3397crypto: ccp - Add support for TEE for PCI ID 0x14CA | | *c55507a94bcrypto: n2 - add missing hash statesize | | *4830750696riscv: mm: notify remote harts about mmu cache updates | | *16b6d9525driscv: stacktrace: Fixup ftrace_graph_ret_addr retp argument | | *657b440a27PCI/sysfs: Fix double free in error path | | *67fd41bbb0PCI: Fix pci_device_is_present() for VFs by checking PF | | *bfce073089ipmi: fix use after free in _ipmi_destroy_user() | | *3b4984035cima: Fix a potential NULL pointer access in ima_restore_measurement_list | | *a843699f16mtd: spi-nor: Check for zero erase size in spi_nor_find_best_erase_type() | | *24f4649cd8ipmi: fix long wait in unload when IPMI disconnect | | *fa6bbb4894ipu3-imgu: Fix NULL pointer dereference in imgu_subdev_set_selection() | | *cdb208b090ASoC: jz4740-i2s: Handle independent FIFO flush bits | | *2d0d083d8awifi: wilc1000: sdio: fix module autoloading | | *2e4a088804efi: Add iMac Pro 2017 to uefi skip cert quirk | | *c49fb9b760md/bitmap: Fix bitmap chunk size overflow issues | | *94fe975d54block: mq-deadline: Do not break sequential write streams to zoned HDDs | | *8e91679f7brtc: ds1347: fix value written to century register | | *5eb8296d73cifs: fix missing display of three mount options | | *cfa9f66f91cifs: fix confusing debug message | | *8b45a3b19amedia: dvb-core: Fix UAF due to refcount races at releasing | | *acf984a371media: dvb-core: Fix double free in dvb_register_device() | | *5fac317beeARM: 9256/1: NWFPE: avoid compiler-generated __aeabi_uldivmod | | *ce50c61245staging: media: tegra-video: fix device_node use after free | | *6b16758215staging: media: tegra-video: fix chan->mipi value on error | | *4f5de49d8ctracing: Fix infinite loop in tracing_read_pipe on overflowed print_trace_line | | *17becbc4ddtracing/probes: Handle system names with hyphens | | *2442e655a6tracing/hist: Fix wrong return value in parse_action_params() | | *2a81ff5ce8tracing: Fix complicated dependency of CONFIG_TRACER_MAX_TRACE | | *fe8c35c6fftracing: Fix race where eprobes can be called before the event | | *eb20f6ed37x86/kprobes: Fix optprobe optimization check with CONFIG_RETHUNK | | *3e0fbc06dbx86/kprobes: Fix kprobes instruction boudary check with CONFIG_RETHUNK | | *6268a0704bftrace/x86: Add back ftrace_expected for ftrace bug reports | | *c95cf30dd4x86/microcode/intel: Do not retry microcode reloading on the APs | | *f8fe2f4178KVM: nVMX: Properly expose ENABLE_USR_WAIT_PAUSE control to L1 | | *ca3483d71bKVM: nVMX: Inject #GP, not #UD, if "generic" VMXON CR0/CR4 check fails | | *2c73b349fdKVM: VMX: Resume guest immediately when injecting #GP on ECREATE | | *4a19f48beeof/kexec: Fix reading 32-bit "linux,initrd-{start,end}" values | | *7eddcdb09fperf/core: Call LSM hook after copying perf_event_attr | | *15697f6533tracing/hist: Fix out-of-bound write on 'action_data.var_ref_idx' | | *fd52b86a72dm cache: set needs_check flag after aborting metadata | | *d2a0b298ebdm cache: Fix UAF in destroy() | | *856edd0e92dm clone: Fix UAF in clone_dtr() | | *9215b25f2edm integrity: Fix UAF in dm_integrity_dtr() | | *34cd15d83bdm thin: Fix UAF in run_timer_softirq() | | *ac362c40e3dm thin: resume even if in FAIL mode | | *4b710e8481dm thin: Use last transaction's pmd->root when commit failed | | *f8c26c33fedm thin: Fix ABBA deadlock between shrink_slab and dm_pool_abort_metadata | | *28d307f380dm cache: Fix ABBA deadlock between shrink_slab and dm_cache_metadata_abort | | *a9e89a567fmptcp: remove MPTCP 'ifdef' in TCP SYN cookies | | *13b9fd0deemptcp: mark ops structures as ro_after_init | | *b2120ed7fdfs: dlm: retry accept() until -EAGAIN or error returns | | *5b4478615ffs: dlm: fix sock release if listen fails | | *b7ede8a63dALSA: hda/realtek: Apply dual codec fixup for Dell Latitude laptops | | *dbd1f30191ALSA: patch_realtek: Fix Dell Inspiron Plus 16 | | *8fb4c98f20cpufreq: Init completion before kobject_init_and_add() | | *876c6ab967PM/devfreq: governor: Add a private governor_data for governor | | *0e945ea733selftests: Use optional USERCFLAGS and USERLDFLAGS | | *31697c5953arm64: dts: qcom: sdm850-lenovo-yoga-c630: correct I2C12 pins drive strength | | *1630498660ARM: ux500: do not directly dereference __iomem | | *99590f29b2btrfs: fix resolving backrefs for inline extent followed by prealloc | | *1f9cf4daf2mmc: sdhci-sprd: Disable CLK_AUTO when the clock is less than 400K | | *58d53ff30aarm64: dts: qcom: sdm845-db845c: correct SPI2 pins drive strength | | *a777b90a05perf/x86/intel/uncore: Clear attr_update properly | | *ca77ac238cperf/x86/intel/uncore: Disable I/O stacks to PMU mapping on ICX-D | | *df06e7777cjbd2: use the correct print format | | *8e75b1dd4bktest.pl minconfig: Unset configs instead of just removing them | | *55e5e8b445kest.pl: Fix grub2 menu handling for rebooting | | *823fed7c40soc: qcom: Select REMAP_MMIO for LLCC driver | | *8dabeeb1ffmedia: stv0288: use explicitly signed char | | *d167ebea90net/af_packet: make sure to pull mac header | | *9ff46c36dfnet/af_packet: add VLAN support for AF_PACKET SOCK_RAW GSO | | *cd0f597c8arcu-tasks: Simplify trc_read_check_handler() atomic operations | | *593ca69668ASoC/SoundWire: dai: expand 'stream' concept beyond SoundWire | | *a7874dac6bASoC: Intel/SOF: use set_stream() instead of set_tdm_slots() for HDAudio | | *ae4f70b2fekcsan: Instrument memcpy/memset/memmove with newer Clang | | *d01fa993ebSUNRPC: Don't leak netobj memory when gss_read_proxy_verf() fails | | *43135fb098tpm: tpm_tis: Add the missed acpi_put_table() to fix memory leak | | *986cd9a9b9tpm: tpm_crb: Add the missed acpi_put_table() to fix memory leak | | *638cd298dftpm: acpi: Call acpi_put_table() to fix memory leak | | *d58289fc77mmc: vub300: fix warning - do not call blocking ops when !TASK_RUNNING | | *7eb57bc92ff2fs: allow to read node block after shutdown | | *acc13987fdf2fs: should put a page when checking the summary info | | *35d8a89862mm, compaction: fix fast_isolate_around() to stay within boundaries | | *91bd504128md: fix a crash in mempool_free | | *29328fbce5mfd: mt6360: Add bounds checking in Regmap read/write call-backs | | *c24cc476acpnode: terminate at peers of source | | *0c9118e381ALSA: line6: fix stack overflow in line6_midi_transmit | | *ac4b4fdf32ALSA: line6: correct midi status byte when receiving data from podxt | | *83c44f0ebfovl: Use ovl mounter's fsuid and fsgid in ovl_link() | | *fcb94283e0binfmt: Fix error return code in load_elf_fdpic_binary() | | *ed9947277bhfsplus: fix bug causing custom uid and gid being unable to be assigned with mount | | *76d52b5412pstore/zone: Use GFP_ATOMIC to allocate zone buffer | | *74b0a2fcc3pstore: Properly assign mem_type property | | *d25aac3489HID: plantronics: Additional PIDs for double volume key presses quirk | | *9d4294545cHID: multitouch: fix Asus ExpertBook P2 P2451FA trackpoint | | *7280fdb80bpowerpc/rtas: avoid scheduling in rtas_os_term() | | *d8939315b7powerpc/rtas: avoid device tree lookups in rtas_os_term() | | *23a249b118objtool: Fix SEGFAULT | | *ed686e7a26fs/ntfs3: Fix slab-out-of-bounds in r_page | | *dd34665cb0fs/ntfs3: Delete duplicate condition in ntfs_read_mft() | | *a9847a11b6fs/ntfs3: Use __GFP_NOWARN allocation at ntfs_fill_super() | | *abd2ee2cf4fs/ntfs3: Use __GFP_NOWARN allocation at wnd_init() | | *d7ce7bb688fs/ntfs3: Validate index root when initialize NTFS security | | *f29676cc3asoundwire: dmi-quirks: add quirk variant for LAPBC710 NUC15 | | *9c8471a17ffs/ntfs3: Fix slab-out-of-bounds read in run_unpack | | *3a52f17867fs/ntfs3: Validate resident attribute name | | *3cd9e5b41bfs/ntfs3: Validate buffer length while parsing index | | *c878a915bcfs/ntfs3: Validate attribute name offset | | *f62506f5e4fs/ntfs3: Add null pointer check for inode operations | | *2dd9ccfb06fs/ntfs3: Fix memory leak on ntfs_fill_super() error path | | *ea6b359840fs/ntfs3: Add null pointer check to attr_load_runs_vcn | | *de5e095524fs/ntfs3: Validate data run offset | | *d4489ba8fbfs/ntfs3: Add overflow check for attribute size | | *af7a195deafs/ntfs3: Validate BOOT record_size | | *8e228ac90cnvmet: don't defer passthrough commands with trivial effects to the workqueue | | *f068a7315anvme: fix the NVME_CMD_EFFECTS_CSE_MASK definition | | *576502f25fata: ahci: Fix PCS quirk application for suspend | | *7949b0df3dblock, bfq: fix uaf for bfqq in bfq_exit_icq_bfqq | | *ff3d9ab51cACPI: resource: do IRQ override on Lenovo 14ALC7 | | *698a0813ceACPI: resource: do IRQ override on XMG Core 15 | | *a9ac7633bbACPI: resource: do IRQ override on LENOVO IdeaPad | | *5fe31f2950ACPI: resource: Skip IRQ override on Asus Vivobook K3402ZA/K3502ZA | | *4c5fee0d88nvme-pci: fix page size checks | | *9141144b37nvme-pci: fix mempool alloc size | | *f17cf8fa2cnvme-pci: fix doorbell buffer value endianness | | *ead99ec669Revert "selftests/bpf: Add test for unstable CT lookup API" | | *bf0543b937cifs: fix oops during encryption | | *56f6de394fusb: dwc3: qcom: Fix memory leak in dwc3_qcom_interconnect_init * | |2ce8e6e296ANDROID: add __dev_kfree_skb_irq to virtual_device abi list |/ / * |24bc28221fRevert "net: add atomic_long_t to net_device_stats fields" * |34d878c5b3Revert "ipv6/sit: use DEV_STATS_INC() to avoid data-races" * |956e2924f3Revert "arm64: Treat ESR_ELx as a 64-bit register" * |8a3baaa85eRevert "arm64: mm: kfence: only handle translation faults" * |8b3730f922Revert "gpiolib: protect the GPIO device against being dropped while in use by user-space" * |b0e87c106dRevert "soreuseport: Fix socket selection for SO_INCOMING_CPU." * |8a8a0cb6c6Revert "bpf, sockmap: Fix missing BPF_F_INGRESS flag when using apply_bytes" * |2d4c48bff9Revert "xhci: Prevent infinite loop in transaction errors recovery for streams" * |20ec745823Merge 5.15.86 into android13-5.15-lts |\| | *90ffbb727cLinux 5.15.86 | *3082f8705epwm: tegra: Fix 32 bit build | *caa40d1f85mfd: qcom_rpm: Use devm_of_platform_populate() to simplify code | *408dbaa065extcon: usbc-tusb320: Call the Type-C IRQ handler only if a port is registered | *2471a44769media: dvbdev: fix refcnt bug | *579fb0a332media: dvbdev: fix build warning due to comments | *1115e77c4fnet: stmmac: fix errno when create_singlethread_workqueue() fails | *d3871af13ascsi: qla2xxx: Fix crash when I/O abort times out | *50f993da94btrfs: do not BUG_ON() on ENOMEM when dropping extent items for a range | *1c65d50315ovl: fix use inode directly in rcu-walk mode | *88ec6d1105fbdev: fbcon: release buffer when fbcon_do_set_font() failed | *ca8bcb348agcov: add support for checksum field | *f36d8c8651floppy: Fix memory leak in do_floppy_init() | *4193a6745bregulator: core: fix deadlock on regulator enable | *ce5d0ef1cfiio: adc128s052: add proper .data members in adc128_of_match table | *aec1058f2aiio: adc: ad_sigma_delta: do not use internal iio_dev lock | *dc6afd6070iio: fix memory leak in iio_device_register_eventset() | *38c257ee6areiserfs: Add missing calls to reiserfs_security_free() | *8a4236456asecurity: Restrict CONFIG_ZERO_CALL_USED_REGS to gcc or clang > 15.0.6 | *1cabce56629p: set req refcount to zero to avoid uninitialized usage | *dd2157a98floop: Fix the max_loop commandline argument treatment when it is set to 0 | *fd03bd4c7bHID: mcp2221: don't connect hidraw | *6c886be1ffHID: wacom: Ensure bootloader PID is usable in hidraw mode | *4d640eb112xhci: Prevent infinite loop in transaction errors recovery for streams | *936c5f96c8usb: dwc3: core: defer probe on ulpi_read_id timeout | *e6bf6c4022usb: dwc3: Fix race between dwc3_set_mode and __dwc3_set_mode | *0e883f3bc8arm64: dts: qcom: sm8250: fix USB-DP PHY registers | *ffb14aac26usb: xhci-mtk: fix leakage of shared hcd when fail to set wakeup irq | *fcacd970e0usb: cdnsp: fix lack of ZLP for ep0 | *bcac79df08ALSA: hda/hdmi: Add HP Device 0x8711 to force connect list | *50c23a1107ALSA: hda/realtek: Add quirk for Lenovo TianYi510Pro-14IOB | *76574b3465ALSA: usb-audio: add the quirk for KT0206 device | *9e787dab98ima: Simplify ima_lsm_copy_rule | *2cd365029cpstore: Make sure CONFIG_PSTORE_PMSG selects CONFIG_RT_MUTEXES | *2068d41a3dafs: Fix lost servers_outstanding count | *0def8af038perf debug: Set debug_peo_args and redirect_to_stderr variable to correct values in perf_quiet_option() | *41cccae10epstore: Switch pmsg_lock to an rt_mutex to avoid priority inversion | *8877df8135LoadPin: Ignore the "contents" argument of the LSM hooks | *584202b0f1drm/i915/display: Don't disable DDI/Transcoder when setting phy test pattern | *b253e075b1ASoC: rt5670: Remove unbalanced pm_runtime_put() | *59f797a913ASoC: rockchip: spdif: Add missing clk_disable_unprepare() in rk_spdif_runtime_resume() | *132844d92fASoC: wm8994: Fix potential deadlock | *82f7c814edASoC: mediatek: mt8183: fix refcount leak in mt8183_mt6358_ts3a227_max98357_dev_probe() | *e5d6bf3e5aASoC: rockchip: pdm: Add missing clk_disable_unprepare() in rockchip_pdm_runtime_resume() | *85eb5c952bASoC: audio-graph-card: fix refcount leak of cpu_ep in __graph_for_each_link() | *9ff07316caASoC: mediatek: mt8173-rt5650-rt5514: fix refcount leak in mt8173_rt5650_rt5514_dev_probe() | *7643909cf0ASoC: Intel: Skylake: Fix driver hang during shutdown | *33ff0f9f9cALSA: hda: add snd_hdac_stop_streams() helper | *78649a624dALSA/ASoC: hda: move/rename snd_hdac_ext_stop_streams to hdac_stream.c | *98b0f50fechwmon: (jc42) Fix missing unlock on error in jc42_write() | *5e69233508KVM: selftests: Fix build regression by using accessor function | *6215904fe2tools/include: Add _RET_IP_ and math definitions to kernel.h | *c885326728orangefs: Fix kmemleak in orangefs_{kernel,client}_debug_init() | *39529b79b0orangefs: Fix kmemleak in orangefs_prepare_debugfs_help_string() | *a075c21ee0drm/sti: Fix return type of sti_{dvo,hda,hdmi}_connector_mode_valid() | *f3c14b99f3drm/fsl-dcu: Fix return type of fsl_dcu_drm_connector_mode_valid() | *9a8862820chugetlbfs: fix null-ptr-deref in hugetlbfs_parse_param() | *4f6b206998scsi: elx: libefc: Fix second parameter type in state callbacks | *23f0e9f863scsi: ufs: Reduce the START STOP UNIT timeout | *2cf66428a2scsi: lpfc: Fix hard lockup when reading the rx_monitor from debugfs | *2b3e3ecdb4crypto: hisilicon/hpre - fix resource leak in remove process | *adf6a00859clk: st: Fix memory leak in st_of_quadfs_setup() | *6c8aee0c8fmedia: si470x: Fix use-after-free in si470x_int_in_callback() | *58b6496a74mmc: renesas_sdhi: better reset from HS400 mode | *c33c904124mmc: f-sdh30: Add quirks for broken timeout clock capability | *69346de0ebwifi: mt76: do not run mt76u_status_worker if the device is not running | *feb847e659regulator: core: fix use_count leakage when handling boot-on | *474e70bd90libbpf: Avoid enum forward-declarations in public API in C++ mode | *6209542869drm/amd/display: Use the largest vready_offset in pipe group | *eff45bfbc2blk-mq: fix possible memleak when register 'hctx' failed | *d0af6220bbmedia: dvb-usb: fix memory leak in dvb_usb_adapter_init() | *88a6f8a72dmedia: dvbdev: adopts refcnt to avoid UAF | *438cd29fecmedia: dvb-frontends: fix leak of memory fw | *a96841f5aaethtool: avoiding integer overflow in ethtool_phys_id() | *b327c68acebpf: Prevent decl_tag from being referenced in func_proto arg | *4b8f3b9392ppp: associate skb with a device at tx | *5d5a481a7fmrp: introduce active flags to prevent UAF when applicant uninit | *222cc04356ipv6/sit: use DEV_STATS_INC() to avoid data-races | *8a3b023710net: add atomic_long_t to net_device_stats fields | *58dd11f624drm/amd/display: fix array index out of bound error in bios parser | *a3cc41e05emd/raid1: stop mdx_raid1 thread when raid1 array run failed | *b621d17fe8drivers/md/md-bitmap: check the return value of md_bitmap_get_counter() | *5afac74f15drm/mediatek: Fix return type of mtk_hdmi_bridge_mode_valid() | *072508e99ddrm/sti: Use drm_mode_copy() | *673a3e0199drm/rockchip: Use drm_mode_copy() | *b9b07900d2drm/msm: Use drm_mode_copy() | *5ad774fb82s390/lcs: Fix return type of lcs_start_xmit() | *dfbf0122eas390/netiucv: Fix return type of netiucv_tx() | *8131d1880cs390/ctcm: Fix return type of ctc{mp,}m_tx() | *f9084e9930drm/amdgpu: Fix type of second parameter in odn_edit_dpm_table() callback | *b74580d618drm/amdgpu: Fix type of second parameter in trans_msg() callback | *314f7092b2igb: Do not free q_vector unless new one was allocated | *0b12d2aa26wifi: brcmfmac: Fix potential shift-out-of-bounds in brcmf_fw_alloc_request() | *19bb9e98e1hamradio: baycom_epp: Fix return type of baycom_send_packet() | *a413ebb604net: ethernet: ti: Fix return type of netcp_ndo_start_xmit() | *5d3f4478d2bpf: make sure skb->len != 0 when redirecting to a tunneling device | *be2803dd29qed (gcc13): use u16 for fid to be big enough | *a8bc0ac438Revert "drm/amd/display: Limit max DSC target bpp for specific monitors" | *cc8deb82ccdrm/amd/display: prevent memory leak | *49dd0e8029ipmi: fix memleak when unload ipmi driver | *68871c005fASoC: codecs: rt298: Add quirk for KBL-R RVP platform | *3eca9697c2wifi: ar5523: Fix use-after-free on ar5523_cmd() timed out | *c319196a0ewifi: ath9k: verify the expected usb_endpoints are present | *10c4b63d09brcmfmac: return error when getting invalid max_flowrings from dongle | *ad31bc146fmedia: imx-jpeg: Disable useless interrupt to avoid kernel panic | *6e1a6880e1drm/etnaviv: add missing quirks for GC300 | *367296925chfs: fix OOB Read in __hfs_brec_find | *ebe16676e1acct: fix potential integer overflow in encode_comp_t() | *8b6ef451b5nilfs2: fix shift-out-of-bounds due to too large exponent of block size | *b47f5c579cnilfs2: fix shift-out-of-bounds/overflow in nilfs_sb2_bad_offset() | *5777432ebaACPICA: Fix error code path in acpi_ds_call_control_method() | *10b87da8fafs: jfs: fix shift-out-of-bounds in dbDiscardAG | *5059ea84a8jfs: Fix fortify moan in symlink | *e7a6a53c87udf: Avoid double brelse() in udf_rename() | *0536f76a2bfs: jfs: fix shift-out-of-bounds in dbAllocAG | *88cea1676abinfmt_misc: fix shift-out-of-bounds in check_special_flags | *cadb938a5ex86/hyperv: Remove unregister syscore call from Hyper-V cleanup | *659747f6f6video: hyperv_fb: Avoid taking busy spinlock on panic path | *9d05c20b0aarm64: make is_ttbrX_addr() noinstr-safe | *98a5b1265arcu: Fix __this_cpu_read() lockdep warning in rcu_force_quiescent_state() | *d238f94b2bHID: amd_sfh: Add missing check for dma_alloc_coherent | *9da204cd67net: stream: purge sk_error_queue in sk_stream_kill_queues() | *f47426250fmyri10ge: Fix an error handling path in myri10ge_probe() | *1ec0a7d5b0rxrpc: Fix missing unlock in rxrpc_do_sendmsg() | *5478eb7adcnet_sched: reject TCF_EM_SIMPLE case for complex ematch module | *4f05d8e2fbmailbox: zynq-ipi: fix error handling while device_register() fails | *550f403e46mailbox: arm_mhuv2: Fix return value check in mhuv2_probe() | *28604a960cmailbox: mpfs: read the system controller's status | *8fb773eed4skbuff: Account for tail adjustment during pull operations | *dc0f38957aarm64: dts: mt8183: Fix Mali GPU clock | *790b396f6bsoc: mediatek: pm-domains: Fix the power glitch issue | *0133615a06openvswitch: Fix flow lookup to use unmasked key | *04e454bd97selftests: devlink: fix the fd redirect in dummy_reporter_test | *d52646a46crtc: mxc_v2: Add missing clk_disable_unprepare() | *ac95c4e35figc: Set Qbv start_time and end_time to end_time if not being configured in GCL | *af59985138igc: Lift TAPRIO schedule restriction | *4d50d640edigc: recalculate Qbv end_time by considering cycle time | *1ef9416957igc: allow BaseTime 0 enrollment for Qbv | *c0df8e7ba6igc: Add checking for basetime less than zero | *5b46b53f45igc: Use strict cycles for Qbv scheduling | *fd7d029436igc: Enhance Qbv scheduling by using first flag bit | *9b5b50329er6040: Fix kmemleak in probe and remove | *1b428ba31bunix: Fix race in SOCK_SEQPACKET's unix_dgram_sendmsg() | *aae9c24ebdnfc: pn533: Clear nfc_target before being used | *bcf2c1dc53net: enetc: avoid buffer leaks on xdp_do_redirect() failure | *f463a1295cselftests/bpf: Add test for unstable CT lookup API | *094f3d9314block, bfq: fix possible uaf for 'bfqq->bic' | *cf48cb8debmISDN: hfcmulti: don't call dev_kfree_skb/kfree_skb() under spin_lock_irqsave() | *5607353751mISDN: hfcpci: don't call dev_kfree_skb/kfree_skb() under spin_lock_irqsave() | *ada4022f48mISDN: hfcsusb: don't call dev_kfree_skb/kfree_skb() under spin_lock_irqsave() | *0578f9929fnet: macsec: fix net device access prior to holding a lock | *a472f069cenfsd: under NFSv4.1, fix double svc_xprt_put on rpc_create failure | *f8f1d037d6rtc: pcf85063: fix pcf85063_clkout_control | *35a174552brtc: pic32: Move devm_rtc_allocate_device earlier in pic32_rtc_probe() | *eea105c4e4rtc: st-lpc: Add missing clk_disable_unprepare in st_rtc_probe() | *74248b5560netfilter: flowtable: really fix NAT IPv6 offload | *5c940632camfd: pm8008: Fix return value check in pm8008_probe() | *ec10848e26mfd: pm8008: Remove driver data structure pm8008_data | *38959417d3mfd: qcom_rpm: Fix an error handling path in qcom_rpm_probe() | *b95ae3543emfd: bd957x: Fix Kconfig dependency on REGMAP_IRQ | *615d3c8a46powerpc/pseries/eeh: use correct API for error log size | *68de42e008powerpc/eeh: Drop redundant spinlock initialization | *2b157b4b13remoteproc: qcom: q6v5: Fix missing clk_disable_unprepare() in q6v5_wcss_qcs404_power_on() | *4b191533f5remoteproc: qcom_q6v5_pas: Fix missing of_node_put() in adsp_alloc_memory_region() | *d7628ebca8remoteproc: qcom_q6v5_pas: detach power domains on remove | *fdf47f462aremoteproc: qcom_q6v5_pas: disable wakeup on probe fail or remove | *098ebb9089remoteproc: qcom: q6v5: Fix potential null-ptr-deref in q6v5_wcss_init_mmio() | *131c0a3eadremoteproc: sysmon: fix memory leak in qcom_add_sysmon_subdev() | *4507c6a672pwm: mediatek: always use bus clock for PWM on MT7622 | *4fbbb14f0epwm: mtk-disp: Fix the parameters calculated by the enabled flag of disp_pwm | *eec59807a2pwm: sifive: Call pwm_sifive_update_clock() while mutex is held | *37ea9a6c41iommu/sun50i: Remove IOMMU_DOMAIN_IDENTITY | *8de2c29db6selftests/powerpc: Fix resource leaks | *dd49c5031epowerpc/hv-gpci: Fix hv_gpci event list | *65d3469f3bpowerpc/83xx/mpc832x_rdb: call platform_device_put() in error case in of_fsl_spi_probe() | *cf03db2896powerpc/perf: callchain validate kernel stack pointer bounds | *5de1902244powerpc/xive: add missing iounmap() in error path in xive_spapr_populate_irq_data() | *b31e9647f1powerpc/xmon: Fix -Wswitch-unreachable warning in bpt_cmds | *6a310e8db5cxl: Fix refcount leak in cxl_calc_capp_routing | *0accd460dcpowerpc/52xx: Fix a resource leak in an error handling path | *be2b9b1a60macintosh/macio-adb: check the return value of ioremap() | *19ded60b40macintosh: fix possible memory leak in macio_add_one_device() | *e42b543d08iommu/fsl_pamu: Fix resource leak in fsl_pamu_probe() | *6e501b3fd7iommu/amd: Fix pci device refcount leak in ppr_notifier() | *9383921e8brtc: pcf85063: Fix reading alarm | *b66aa7b306rtc: snvs: Allow a time difference on clock register read | *7a6cc22eabrtc: cmos: Disable ACPI RTC event on removal | *689f757f0artc: cmos: Rename ACPI-related functions | *1c74bbecdartc: cmos: Eliminate forward declarations of some functions | *3a439a2cabrtc: cmos: Call rtc_wake_setup() from cmos_do_probe() | *9febdff75crtc: cmos: Call cmos_wake_setup() from cmos_do_probe() | *d9324fb3eertc: cmos: fix build on non-ACPI platforms | *fe46b9303ertc: cmos: Fix wake alarm breakage | *60c6e563a8rtc: cmos: Fix event handler registration ordering issue | *d3aa083469rtc: rtc-cmos: Do not check ACPI_FADT_LOW_POWER_S0 | *6e98a93c75dmaengine: idxd: Fix crc_val field for completion record | *ab53749c32fs/ntfs3: Fix slab-out-of-bounds read in ntfs_trim_fs | *1ba0968b33pwm: tegra: Improve required rate calculation | *c160505c9binclude/uapi/linux/swab: Fix potentially missing __always_inline | *59463193b0phy: usb: s2 WoL wakeup_count not incremented for USB->Eth devices | *ae00848e55iommu/rockchip: fix permission bits in page table entries v2 | *a7f6ad2c42iommu/sun50i: Fix flush size | *38ccb9b469iommu/sun50i: Fix R/W permission check | *ae4ab47a0biommu/sun50i: Consider all fault sources for reset | *84fee3ce82iommu/sun50i: Fix reset release | *6f9fe31a48fs/ntfs3: Harden against integer overflows | *30f20ceb87overflow: Implement size_t saturating arithmetic helpers | *4b51f27d44fs/ntfs3: Avoid UBSAN error on true_sectors_per_clst() | *28f345bec7RDMA/siw: Fix pointer cast warning | *01d925e2a5perf stat: Do not delay the workload with --delay | *a273f1dd5dperf stat: Refactor __run_perf_stat() common code | *d21534ab4fpower: supply: fix null pointer dereferencing in power_supply_get_battery_info | *d4898d8de6power: supply: ab8500: Fix error handling in ab8500_charger_init() | *30b191798fHSI: omap_ssi_core: Fix error handling in ssi_init() | *a72fe8eb55power: supply: z2_battery: Fix possible memleak in z2_batt_probe() | *5ba0e8fa15perf symbol: correction while adjusting symbol | *a34027b63dperf trace: Handle failure when trace point folder is missed | *60aeacce64perf trace: Use macro RAW_SYSCALL_ARGS_NUM to replace number | *e4700f62dcperf trace: Return error if a system call doesn't exist | *870ad0917dpower: supply: fix residue sysfs file in error handle route of __power_supply_register() | *1c2b9c8100HSI: omap_ssi_core: fix possible memory leak in ssi_probe() | *c5f729d3d6HSI: omap_ssi_core: fix unbalanced pm_runtime_disable() | *ea37831f83fbdev: uvesafb: Fixes an error handling path in uvesafb_probe() | *5bcae36b58fbdev: uvesafb: don't build on UML | *07c1a3c2dffbdev: geode: don't build on UML | *ace8312b5dfbdev: ep93xx-fb: Add missing clk_disable_unprepare in ep93xxfb_probe() | *04946113fbfbdev: vermilion: decrease reference count in error path | *fc0d5034fafbdev: via: Fix error in via_core_init() | *9827246333fbdev: pm2fb: fix missing pci_disable_device() | *3aa4205134fbdev: ssd1307fb: Drop optional dependency | *4958316a6dthermal/drivers/qcom/lmh: Fix irq handler return value | *ad72205ac6thermal/drivers/qcom/temp-alarm: Fix inaccurate warning for gen2 | *37fb4e13d2thermal/drivers/imx8mm_thermal: Validate temperature range | *95c18f4a3csamples: vfio-mdev: Fix missing pci_disable_device() in mdpy_fb_probe() | *31c1b5d300ksmbd: Fix resource leak in ksmbd_session_rpc_open() | *a44102d36atracing/hist: Fix issue of losting command info in error_log | *8308ccfcbdusb: storage: Add check for kcalloc | *96c12fd0eci2c: ismt: Fix an out-of-bounds bug in ismt_access() | *8212800943i2c: mux: reg: check return value after calling platform_get_resource() | *46d8f63bb8gpiolib: protect the GPIO device against being dropped while in use by user-space | *333a271dfdgpiolib: make struct comments into real kernel docs | *7c755a2d6dgpiolib: cdev: fix NULL-pointer dereferences | *b0a26e1999gpiolib: Get rid of redundant 'else' | *4bc217b25evme: Fix error not catched in fake_init() | *31bfe024a9staging: rtl8192e: Fix potential use-after-free in rtllib_rx_Monitor() | *b0aaec894astaging: rtl8192u: Fix use after free in ieee80211_rx() | *ed4580c3f8i2c: pxa-pci: fix missing pci_disable_device() on error in ce4100_i2c_probe | *28dc61cc49chardev: fix error handling in cdev_device_add() | *43bfc7c240mcb: mcb-parse: fix error handing in chameleon_parse_gdd() | *f3686e5e8ddrivers: mcb: fix resource leak in mcb_probe() | *9d4a0aca8ausb: gadget: f_hid: fix refcount leak on error path | *d3136b7970usb: gadget: f_hid: fix f_hidg lifetime vs cdev | *a41c2bba7fusb: roles: fix of node refcount leak in usb_role_switch_is_parent() | *18b9202188coresight: trbe: remove cpuhp instance node before remove cpuhp state | *e854a4ab38counter: stm32-lptimer-cnt: fix the check on arr and cmp registers update | *39a156715fiio: adis: add '__adis_enable_irq()' implementation | *3a2dde8e5diio:imu:adis: Move exports into IIO_ADISLIB namespace | *3c2e13025biio: adis: stylistic changes | *de3e358912iio: adis: handle devices that cannot unmask the drdy pin | *994243de7aiio: temperature: ltc2983: make bulk write buffer DMA-safe | *22511eefa6cxl: fix possible null-ptr-deref in cxl_pci_init_afu|adapter() | *e5021bbf11cxl: fix possible null-ptr-deref in cxl_guest_init_afu|adapter() | *b308fdedeffirmware: raspberrypi: fix possible memory leak in rpi_firmware_probe() | *d5c8f9003amisc: sgi-gru: fix use-after-free error in gru_set_context_option, gru_fault and gru_handle_user_call_os | *848c45964dmisc: tifm: fix possible memory leak in tifm_7xx1_switch_media() | *37a13b274eocxl: fix pci device refcount leak when calling get_function_0() | *3299983a6bmisc: ocxl: fix possible name leak in ocxl_file_register_afu() | *357379d504test_firmware: fix memory leak in test_firmware_init() | *07d547d742serial: sunsab: Fix error handling in sunsab_init() | *919e745fddserial: altera_uart: fix locking in polling mode | *e1c4f18214tty: serial: altera_uart_{r,t}x_chars() need only uart_port | *b133b45ba6tty: serial: clean up stop-tx part in altera_uart_tx_chars() | *6f7d82380fserial: pch: Fix PCI device refcount leak in pch_request_dma() | *0dfc7dfe5bserial: stm32: move dma_request_chan() before clk_prepare_enable() | *926b0967f7serial: pl011: Do not clear RX FIFO & RX interrupt in unthrottle. | *d71a611fcaserial: amba-pl011: avoid SBSA UART accessing DMACR register | *f46f9d2e16extcon: usbc-tusb320: Update state on probe even if no IRQ pending | *ac067e75c4extcon: usbc-tusb320: Add USB TYPE-C support | *9280761167extcon: usbc-tusb320: Factor out extcon into dedicated functions | *05aa8ff22dusb: typec: Factor out non-PD fwnode properties | *31e9c27510extcon: usbc-tusb320: Add support for TUSB320L | *b9c8820d91extcon: usbc-tusb320: Add support for mode setting and reset | *4524599a6ausb: typec: tipd: Fix spurious fwnode_handle_put in error path | *b0d86eacc8usb: typec: tipd: Cleanup resources if devm_tps6598_psy_register fails | *ba75be6f0dusb: typec: tcpci: fix of node refcount leak in tcpci_register_port() | *154d5713a2usb: typec: Check for ops->exit instead of ops->enter in altmode_exit | *1f5661388fstaging: vme_user: Fix possible UAF in tsi148_dma_list_add | *a3c4bc2616usb: fotg210-udc: Fix ages old endianness issues | *5e87d41221uio: uio_dmem_genirq: Fix deadlock between irq config and handling | *79a4bdb6b9uio: uio_dmem_genirq: Fix missing unlock in irq configuration | *3f22a273efvfio: platform: Do not pass return buffer to ACPI _RST method | *417ef049e3class: fix possible memory leak in __class_register() | *f76824ab2bserial: 8250_bcm7271: Fix error handling in brcmuart_init() | *6b4424efcfserial: tegra: Read DMA status before terminating | *a0ead7e8dadrivers: dio: fix possible memory leak in dio_init() | *e8985caf05RISC-V: Align the shadow stack | *ca48174a76IB/IPoIB: Fix queue count inconsistency for PKEY child interfaces | *82bd423ed9hwrng: geode - Fix PCI device refcount leak | *2b79a5e560hwrng: amd - Fix PCI device refcount leak | *42cbff35f4crypto: img-hash - Fix variable dereferenced before check 'hdev->req' | *b9634f99b6RDMA/hns: Fix error code of CMD | *b06bb747ceRDMA/hns: Fix page size cap from firmware | *4c05c7cf25RDMA/hns: Fix PBL page MTR find | *fa267c4192RDMA/hns: Fix AH attr queried by query_qp | *e27fb26e75orangefs: Fix sysfs not cleanup when dev init failed | *3e9c395ef2PCI: mt7621: Add sentinel to quirks table | *bcc65c2e2aPCI: mt7621: Rename mt7621_pci_ to mt7621_pcie_ | *0a7eab1cc4RDMA/srp: Fix error return code in srp_parse_options() | *6301100179RDMA/hfi1: Fix error return code in parse_platform_config() | *339ca035afriscv/mm: add arch hook arch_clear_hugepage_flags | *20d363dcd6crypto: omap-sham - Use pm_runtime_resume_and_get() in omap_sham_probe() | *815b65d714crypto: amlogic - Remove kcalloc without check | *af71199291RDMA/nldev: Fix failure to send large messages | *bb895786a4f2fs: avoid victim selection from previous victim section | *655e955debRDMA/nldev: Add checks for nla_nest_start() in fill_stat_counter_qps() | *1895e908b3scsi: snic: Fix possible UAF in snic_tgt_create() | *09a60f908dscsi: fcoe: Fix transport not deattached when fcoe_if_init() fails | *e59da17205scsi: ipr: Fix WARNING in ipr_init() | *c444f58fdascsi: scsi_debug: Fix possible name leak in sdebug_add_host_helper() | *4e4968dfb5scsi: fcoe: Fix possible name leak when device_register() fails | *0f5006d7d1scsi: scsi_debug: Fix a warning in resp_report_zones() | *2432719b1ascsi: scsi_debug: Fix a warning in resp_verify() | *038359eeccscsi: efct: Fix possible memleak in efct_device_init() | *23053a7926scsi: hpsa: Fix possible memory leak in hpsa_add_sas_device() | *2ab6d5927cscsi: hpsa: Fix error handling in hpsa_add_sas_host() | *6a92129c8fscsi: mpt3sas: Fix possible resource leaks in mpt3sas_transport_port_add() | *26c0f7e1acpadata: Fix list iterator in padata_do_serial() | *17afa98bccpadata: Always leave BHs disabled when running ->parallel() | *221afb2a1bcrypto: tcrypt - Fix multibuffer skcipher speed test mem leak | *bfe10a1d9fscsi: hpsa: Fix possible memory leak in hpsa_init_one() | *38ef0c0b09dt-bindings: visconti-pcie: Fix interrupts array max constraints | *83aad8111bdt-bindings: imx6q-pcie: Fix clock names for imx6sx and imx8mq | *f64f08b9e6RDMA/rxe: Fix NULL-ptr-deref in rxe_qp_do_cleanup() when socket create failed | *35f9cd060eRDMA/hns: fix memory leak in hns_roce_alloc_mr() | *6d5220a553crypto: ccree - Make cc_debugfs_global_fini() available for module init function | *2e9cf3e783RDMA/hfi: Decrease PCI device reference count in error path | *7f476d639cPCI: Check for alloc failure in pci_request_irq() | *49bc2be897RDMA/hns: Fix ext_sge num error when post send | *0e6160d79dRDMA/hns: Repacing 'dseg_len' by macros in fill_ext_sge_inl_data() | *e5ea48788ecrypto: hisilicon/qm - add missing pci_dev_put() in q_num_set() | *442caec12fcrypto: cryptd - Use request context instead of stack for sub-request | *ab677729fccrypto: ccree - Remove debugfs when platform_driver_register failed | *0328ca389ascsi: scsi_debug: Fix a warning in resp_write_scat() | *1ba8ecb664RDMA/siw: Set defined status for work completion with undefined status | *6e757005baRDMA/nldev: Return "-EAGAIN" if the cm_id isn't from expected port | *f981c697b2RDMA/core: Make sure "ib_port" is valid when access sysfs node | *13586753aeRDMA/restrack: Release MR restrack when delete | *6e78ca677fPCI: vmd: Disable MSI remapping after suspend | *47e31b86edIB/mad: Don't call to function that might sleep while in atomic context | *f8d8fbd3b6RDMA/siw: Fix immediate work request flush to completion queue | *2a26849d79scsi: qla2xxx: Fix set-but-not-used variable warnings | *799ed37559RDMA/irdma: Report the correct link speed | *d40d1b1c61f2fs: fix to destroy sbi->post_read_wq in error path of f2fs_fill_super() | *847f725006f2fs: fix normal discard process | *865bb7b5a7f2fs: fix to invalidate dcc->f2fs_issue_discard in error path | *5f509fa740apparmor: Fix memleak in alloc_ns() | *46f3cb83e4crypto: rockchip - rework by using crypto_engine | *3ed0548d39crypto: rockchip - remove non-aligned handling | *5562009f5fcrypto: rockchip - better handle cipher key | *26f3971356crypto: rockchip - add fallback for ahash | *34fe54af3ccrypto: rockchip - add fallback for cipher | *314217591ecrypto: rockchip - do not store mode globally | *853cd97d2bcrypto: rockchip - do not do custom power management | *d5100272e4f2fs: Fix the race condition of resize flag between resizefs | *db72c5dffcPCI: pci-epf-test: Register notifier if only core_init_notifier is enabled | *26ffeff67bRDMA/core: Fix order of nldev_exit call | *a00a7ac251PCI: dwc: Fix n_fts[] array overrun | *10ae636115apparmor: Use pointer to struct aa_label for lbs_cred | *8d50ccfbe2scsi: core: Fix a race between scsi_done() and scsi_timeout() | *9bdf3a59b3crypto: nitrox - avoid double free on error path in nitrox_sriov_init() | *7efc0d39eecrypto: sun8i-ss - use dma_addr instead u32 | *aaef0bdd7acrypto: hisilicon/qm - fix missing destroy qp_idr | *d567776ae2apparmor: Fix abi check to include v8 abi | *bc9d2cbbdcapparmor: fix lockdep warning when removing a namespace | *775a37ffa9apparmor: fix a memleak in multi_transaction_new() | *09f30f394enet: dsa: tag_8021q: avoid leaking ctx on dsa_tag_8021q_register() error path | *86664b8652i40e: Fix the inability to attach XDP program on downed interface | *0abd337acdstmmac: fix potential division by 0 | *93a4a04558Bluetooth: RFCOMM: don't call kfree_skb() under spin_lock_irqsave() | *8d6bbe5241Bluetooth: hci_core: don't call kfree_skb() under spin_lock_irqsave() | *804de4e24aBluetooth: hci_bcsp: don't call kfree_skb() under spin_lock_irqsave() | *1030c3aeeeBluetooth: hci_h5: don't call kfree_skb() under spin_lock_irqsave() | *9fcb5b367eBluetooth: hci_ll: don't call kfree_skb() under spin_lock_irqsave() | *14cc94a598Bluetooth: hci_qca: don't call kfree_skb() under spin_lock_irqsave() | *06467130d5Bluetooth: btusb: don't call kfree_skb() under spin_lock_irqsave() | *e52b7d460aBluetooth: btintel: Fix missing free skb in btintel_setup_combined() | *f7c9de3bcfBluetooth: MGMT: Fix error report for ADD_EXT_ADV_PARAMS | *2addf3cb63sctp: sysctl: make extra pointers netns aware | *21296a52cantb_netdev: Use dev_kfree_skb_any() in interrupt context | *0fff763f11net: lan9303: Fix read error execution path | *882bad40a0can: tcan4x5x: Fix use of register error status mask | *d50092f662can: m_can: Call the RAM init directly from m_can_chip_config | *55064642aacan: tcan4x5x: Remove invalid write in clear_interrupts | *641eef8766net: amd-xgbe: Check only the minimum speed for active/passive cables | *60b35e28dcnet: amd-xgbe: Fix logic around active and passive cables | *d436bf39f4af_unix: call proto_unregister() in the error path in af_unix_init() | *ee9d03bf89net: amd: lance: don't call dev_kfree_skb() under spin_lock_irqsave() | *6f1c4c01cchamradio: don't call dev_kfree_skb() under spin_lock_irqsave() | *eb2c6a6e8fnet: ethernet: dnet: don't call dev_kfree_skb() under spin_lock_irqsave() | *ef08e1082cnet: emaclite: don't call dev_kfree_skb() under spin_lock_irqsave() | *2786ef4066net: apple: bmac: don't call dev_kfree_skb() under spin_lock_irqsave() | *d81314e2ddnet: apple: mace: don't call dev_kfree_skb() under spin_lock_irqsave() | *9a6544343bnet/tunnel: wait until all sk_user_data reader finish before releasing the sock | *998b4e54f5net: farsync: Fix kmemleak when rmmods farsync | *71605c6906ethernet: s2io: don't call dev_kfree_skb() under spin_lock_irqsave() | *ce1b3a41e7of: overlay: fix null pointer dereferencing in find_dup_cset_node_entry() and find_dup_cset_prop() | *8399b98935drivers: net: qlcnic: Fix potential memory leak in qlcnic_sriov_init() | *96e5089702net: stmmac: fix possible memory leak in stmmac_dvr_probe() | *ecaf934e44net: stmmac: selftests: fix potential memleak in stmmac_test_arpoffload() | *e1359bc90anet: defxx: Fix missing err handling in dfx_init() | *c65603abc3net: vmw_vsock: vmci: Check memcpy_from_msg() | *9de42116fcclk: socfpga: Fix memory leak in socfpga_gate_init() | *e515881adebpf: Do not zero-extend kfunc return values | *ce61a877c7blktrace: Fix output non-blktrace event when blk_classic option enabled | *f2ae56fa0bwifi: brcmfmac: Fix error return code in brcmf_sdio_download_firmware() | *23060daf37wifi: rtl8xxxu: Fix the channel width reporting | *6d0e00334ewifi: rtl8xxxu: Add __packed to struct rtl8723bu_c2h | *e69d380650spi: spi-gpio: Don't set MOSI as an input if not 3WIRE mode | *4e501a31afclk: samsung: Fix memory leak in _samsung_clk_register_pll() | *441c05485cmedia: coda: Add check for kmalloc | *b99872178emedia: coda: Add check for dcoda_iram_alloc | *fbf081ebe2media: c8sectpfe: Add of_node_put() when breaking out of loop | *2a7330d820regulator: qcom-labibb: Fix missing of_node_put() in qcom_labibb_regulator_probe() | *ecf1b317a8mmc: core: Normalize the error handling branch in sd_read_ext_regs() | *7fecca429ememstick/ms_block: Add check for alloc_ordered_workqueue | *b77ced3fcememstick: ms_block: Add error handling support for add_disk() | *ae00eb6779mmc: renesas_sdhi: alway populate SCC pointer | *88fa6a4e39mmc: mmci: fix return value check of mmc_add_host() | *29c3690969mmc: wbsd: fix return value check of mmc_add_host() | *0959cc1685mmc: via-sdmmc: fix return value check of mmc_add_host() | *e0cfe7aa41mmc: meson-gx: fix return value check of mmc_add_host() | *62005dfcc3mmc: omap_hsmmc: fix return value check of mmc_add_host() | *1925472decmmc: atmel-mci: fix return value check of mmc_add_host() | *58c3a8d0f1mmc: wmt-sdmmc: fix return value check of mmc_add_host() | *afc898019emmc: vub300: fix return value check of mmc_add_host() | *6444079767mmc: toshsd: fix return value check of mmc_add_host() | *df683201c7mmc: rtsx_usb_sdmmc: fix return value check of mmc_add_host() | *30dc645461mmc: rtsx_pci: fix return value check of mmc_add_host() | *bc7e8744f5mmc: pxamci: fix return value check of mmc_add_host() | *2d496050demmc: mxcmmc: fix return value check of mmc_add_host() | *f0502fe86ammc: moxart: fix return value check of mmc_add_host() | *29c5b4da41mmc: alcor: fix return value check of mmc_add_host() | *52e0d8a8ddriscv, bpf: Emit fixed-length instructions for BPF_PSEUDO_FUNC | *0de70ed675NFSv4.x: Fail client initialisation if state manager thread can't run | *7055c878a0SUNRPC: Fix missing release socket in rpc_sockname() | *79d4cd40daxprtrdma: Fix regbuf data not freed in rpcrdma_req_create() | *cba633b24aALSA: mts64: fix possible null-ptr-defer in snd_mts64_interrupt | *9018550d96media: saa7164: fix missing pci_disable_device() | *2df1e2a6ecALSA: pcm: Set missing stop_operating flag at undoing trigger start | *a443c55d96bpf, sockmap: fix race in sock_map_free() | *5229b90337hwmon: (jc42) Restore the min/max/critical temperatures on resume | *785f5c732ahwmon: (jc42) Convert register access and caching to regmap/regcache | *c4c64d8abdregulator: core: fix resource leak in regulator_register() | *07f82dca11configfs: fix possible memory leak in configfs_create_dir() | *21a061772bhsr: Synchronize sequence number updates. | *a82f5b2e08hsr: Synchronize sending frames to have always incremented outgoing seq nr. | *bb3b40cd6ahsr: Disable netpoll. | *8e148d981bhsr: Avoid double remove of a node. | *9387cbf7f7hsr: Add a rcu-read lock to hsr_forward_skb(). | *a051e10bfcclk: qcom: clk-krait: fix wrong div2 functions | *8275c7465dclk: qcom: lpass-sc7180: Fix pm_runtime usage | *91657ec4d0regulator: core: fix module refcount leak in set_supply() | *66976a3be9wifi: mt76: fix coverity overrun-call in mt76_get_txpower() | *a21e3f6f41wifi: mt76: mt7921: fix reporting of TX AGGR histogram | *c8659018b6mt76: stop the radar detector after leaving dfs channel | *ae19622e7fwifi: cfg80211: Fix not unregister reg_pdev when load_builtin_regdb_keys() fails | *2e32f12998wifi: mac80211: fix memory leak in ieee80211_if_add() | *f58888434dspi: spidev: mask SPI_CS_HIGH in SPI_IOC_RD_MODE | *b6d27d9250bonding: uninitialized variable in bond_miimon_inspect() | *7201e4f4f5bpf, sockmap: Fix data loss caused by using apply_bytes on ingress redirect | *6105ed3598bpf, sockmap: Fix missing BPF_F_INGRESS flag when using apply_bytes | *8786bde11abpf, sockmap: Fix repeated calls to sock_put() when msg has more_data | *a222f992ceInput: wistron_btns - disable on UML | *d78649c21bnetfilter: conntrack: set icmpv6 redirects as RELATED | *09fe3b1392ASoC: pcm512x: Fix PM disable depth imbalance in pcm512x_probe | *8876793e56drm/amdkfd: Fix memory leakage | *8f2d2badf8drm/amdgpu: Fix PCI device refcount leak in amdgpu_atrm_get_bios() | *88c6e0995cdrm/radeon: Fix PCI device refcount leak in radeon_atrm_get_bios() | *0af0ff9fc0drm/amd/pm/smu11: BACO is supported when it's in BACO state | *27e7cf595dASoC: mediatek: mt8173: Enable IRQ when pdata is ready | *905e565375ASoC: mediatek: mt8173: Fix debugfs registration for components | *d8e32f1bf1wifi: iwlwifi: mvm: fix double free on tx path. | *d0bb44775cALSA: asihpi: fix missing pci_disable_device() | *f12377abacNFS: Fix an Oops in nfs_d_automount() | *9a96aff53cNFSv4: Fix a deadlock between nfs4_open_recover_helper() and delegreturn | *c6aca4c7baNFSv4: Fix a credential leak in _nfs4_discover_trunking() | *7f6607c884NFSv4.2: Fix initialisation of struct nfs4_label | *51899eefd1NFSv4.2: Fix a memory stomp in decode_attr_security_label | *34dffc77ddNFSv4.2: Clear FATTR4_WORD2_SECURITY_LABEL when done decoding | *d926611c89ASoC: mediatek: mtk-btcvsd: Add checks for write and read of mtk_btcvsd_snd | *f243ff92d6ASoC: dt-bindings: wcd9335: fix reset line polarity in example | *41d7b8291cdrm/tegra: Add missing clk_disable_unprepare() in tegra_dc_probe() | *2376d7fa08media: s5p-mfc: Add variant data for MFC v7 hardware for Exynos 3250 SoC | *210fcf64bemedia: dvb-usb: az6027: fix null-ptr-deref in az6027_i2c_xfer() | *b223cc15f9media: dvb-core: Fix ignored return value in dvb_register_frontend() | *825a8af31dpinctrl: pinconf-generic: add missing of_node_put() | *eedc698d66clk: imx8mn: fix imx8mn_enet_phy_sels clocks list | *f86a432604clk: imx8mn: fix imx8mn_sai2_sels clocks list | *5e98c3a345clk: imx: replace osc_hdmi with dummy | *9453e097b8clk: imx8mn: rename vpu_pll to m7_alt_pll | *bffc80bac8media: imon: fix a race condition in send_packet() | *9c9ff35d68media: vimc: Fix wrong function called when vimc_init() fails | *f849c116d3ASoC: qcom: Add checks for devm_kcalloc | *16437645dddrbd: destroy workqueue when drbd device was freed | *cdaf45415cdrbd: remove call to memset before free device/resource/connection | *f35981083cmtd: maps: pxa2xx-flash: fix memory leak in probe | *87c750affdbonding: fix link recovery in mode 2 when updelay is nonzero | *02105f0b30drm/amdgpu: fix pci device refcount leak | *5b0a1f1247clk: rockchip: Fix memory leak in rockchip_clk_register_pll() | *27aac5c012regulator: core: use kfree_const() to free space conditionally | *a69b1faa9bALSA: seq: fix undefined behavior in bit shift for SNDRV_SEQ_FILTER_USE_EVENT | *9c0f3617baALSA: pcm: fix undefined behavior in bit shift for SNDRV_PCM_RATE_KNOT | *6159424e2dpinctrl: k210: call of_node_put() | *18a973fcb1HID: hid-sensor-custom: set fixed size for custom attributes | *0fc4280dbebpf: Move skb->len == 0 checks into __bpf_redirect | *8dbcb4c284mtd: spi-nor: Fix the number of bytes for the dummy cycles | *58e1a0ef52mtd: spi-nor: hide jedec_id sysfs attribute if not present | *348d95e39finet: add READ_ONCE(sk->sk_bound_dev_if) in inet_csk_bind_conflict() | *4451bef1a3media: videobuf-dma-contig: use dma_mmap_coherent | *b2781a8626media: platform: exynos4-is: Fix error handling in fimc_md_init() | *7cf71bbe5dmedia: solo6x10: fix possible memory leak in solo_sysfs_init() | *c290aa527fmedia: vidtv: Fix use-after-free in vidtv_bridge_dvb_init() | *648f303102Input: elants_i2c - properly handle the reset GPIO when power is off | *e0d3e46ac6mtd: lpddr2_nvm: Fix possible null-ptr-deref | *ab4e42f519drm/msm/a6xx: Fix speed-bin detection vs probe-defer | *fea795f7c7wifi: ath10k: Fix return value in ath10k_pci_init() | *77482c4dd4block: clear ->slave_dir when dropping the main slave_dir reference | *62251948e2ima: Fix misuse of dereference of pointer in template_desc_init_fields() | *29d6c69ba4integrity: Fix memory leakage in keyring allocation error path | *8e6df95717drm/fourcc: Fix vsub/hsub for Q410 and Q401 | *ec1727f89edrm/fourcc: Add packed 10bit YUV 4:2:0 format | *f72608b8ddregulator: qcom-rpmh: Fix PMR735a S3 regulator spec | *63d011ad05nvme: return err on nvme_init_non_mdts_limits fail | *f289a38df0amdgpu/pm: prevent array underflow in vega20_odn_edit_dpm_table() | *cda1895f3bregulator: core: fix unbalanced of node refcount in regulator_dev_lookup() | *1a5aaa5736nvmet: only allocate a single slab for bvecs | *cb3033a432libbpf: Fix uninitialized warning in btf_dump_dump_type_data | *83baa50939ASoC: pxa: fix null-pointer dereference in filter() | *a06ba0f7f8drm/mediatek: Modify dpi power on/off sequence. | *6d25bc6370drm/radeon: Add the missed acpi_put_table() to fix memory leak | *4cf11e9d31bfq: fix waker_bfqq inconsistency crash | *55e822212erxrpc: Fix ack.bufferSize to be 0 when generating an ack | *5ef8bf0df1net, proc: Provide PROC_FS=n fallback for proc_create_net_single_write() | *d1c44928bbmedia: camss: Clean up received buffers on failed start of streaming | *3b4b4df3f8wifi: rsi: Fix handling of 802.3 EAPOL frames sent via control port | *9e1440c858Input: joystick - fix Kconfig warning for JOYSTICK_ADC | *71212d7318mtd: Fix device name leak when register device failed in add_mtd_device() | *106311677bclk: qcom: gcc-sm8250: Use retention mode for USB GDSCs | *322c7415e7bpf: propagate precision across all frames, not just the last one | *07c286c10abpf: Check the other end of slot_type for STACK_SPILL | *fdbc363bc1bpf: propagate precision in ALU/ALU64 operations | *b29e46610cmedia: platform: exynos4-is: fix return value check in fimc_md_probe() | *ab54081a28media: vivid: fix compose size exceed boundary | *3c58c83c6fbpf: Fix slot type check in check_stack_write_var_off | *cffa75198cdrm/msm/hdmi: use devres helper for runtime PM management | *58d002b72edrm/msm/hdmi: drop unused GPIO support | *2d4bc60693ima: Handle -ESTALE returned by ima_filter_rule_match() | *13fc167e16drm/panel/panel-sitronix-st7701: Remove panel on DSI attach failure | *c20672cfa0spi: Update reference to struct spi_controller | *2858d038c5clk: renesas: r9a06g032: Repair grave increment error | *f6ed73db39drm/rockchip: lvds: fix PM usage counter unbalance in poweron | *13fab6322bcan: kvaser_usb: Compare requested bittiming parameters with actual parameters in do_set_{,data}_bittiming | *4e55d61e87can: kvaser_usb: Add struct kvaser_usb_busparams | *fcfd4df200can: kvaser_usb_leaf: Fix bogus restart events | *51f07da38bcan: kvaser_usb_leaf: Fix wrong CAN state after stopping | *647c26887bcan: kvaser_usb_leaf: Fix improved state not being reported | *9676d65a4acan: kvaser_usb: make use of units.h in assignment of frequency | *c761108562can: kvaser_usb_leaf: Set Warning state even without bus errors | *a60bf9d814can: kvaser_usb: kvaser_usb_leaf: Handle CMD_ERROR_EVENT | *8aae6bddc1can: kvaser_usb: kvaser_usb_leaf: Rename {leaf,usbcan}_cmd_error_event to {leaf,usbcan}_cmd_can_error_event | *972270be24can: kvaser_usb: kvaser_usb_leaf: Get capabilities from device | *e9e0d9945fcan: kvaser_usb: do not increase tx statistics when sending error message frames | *e39bce64e5libbpf: Btf dedup identical struct test needs check for nested structs/arrays | *d4419f93e2media: exynos4-is: don't rely on the v4l2_async_subdev internals | *8741792d82soreuseport: Fix socket selection for SO_INCOMING_CPU. | *094f56192cvenus: pm_helpers: Fix error check in vcodec_domains_get() | *3c793a9ad9media: i2c: ad5820: Fix error path | *07611f9e44media: adv748x: afe: Select input port when initializing AFE | *aa81257dbfmedia: coda: jpeg: Add check for kmalloc | *9a402adc9fmedia: v4l2-ctrls: Fix off-by-one error in integer menu control check | *1caed03305drm/amdgpu/powerplay/psm: Fix memory leak in power state init | *f66a877083ipmi: kcs: Poll OBF briefly to reduce OBE latency | *983320199eata: libata: fix NCQ autosense logic | *a9caf71aebata: add/use ata_taskfile::{error|status} fields | *3483c3fb48ata: libata: move ata_{port,link,dev}_dbg to standard pr_XXX() macros | *6706135577libbpf: Fix null-pointer dereference in find_prog_by_sec_insn() | *a733bf1019libbpf: Fix use-after-free in btf_dump_name_dups | *b5ec2a04fedrm/bridge: adv7533: remove dynamic lane switching from adv7533 bridge | *6d40a49d05wifi: rtl8xxxu: Fix reading the vendor of combo chips | *355f16f756wifi: ath9k: hif_usb: Fix use-after-free in ath9k_hif_usb_reg_in_cb() | *d856f7574bwifi: ath9k: hif_usb: fix memory leak of urbs in ath9k_hif_usb_dealloc_tx_urbs() | *12229a2523platform/mellanox: mlxbf-pmc: Fix event typo | *a0d93aac54rapidio: devices: fix missing put_device in mport_cdev_open | *7af9cb8cbbhfs: Fix OOB Write in hfs_asc2mac | *90962b3b1crelay: fix type mismatch when allocating memory in relay_create_buf() | *0d60b11f8feventfd: change int to __u64 in eventfd_signal() ifndef CONFIG_EVENTFD | *2f5cc7fd73rapidio: fix possible UAF when kfifo_alloc() fails | *337b68da68fs: sysv: Fix sysv_nblocks() returns wrong value | *95d42a8d3dlockd: set other missing fields when unlocking files | *318229b4d3MIPS: OCTEON: warn only once if deprecated link status is being used | *5e6d37a93aMIPS: BCM63xx: Add check for NULL for clk in clk_enable | *50af0ba3e1platform/x86: intel_scu_ipc: fix possible name leak in __intel_scu_ipc_register() | *3cf8150135platform/x86: mxm-wmi: fix memleak in mxm_wmi_call_mx[ds|mx]() | *0ceadb5a3eplatform/chrome: cros_ec_typec: zero out stale pointers | *49c98b5688platform/chrome: cros_ec_typec: Cleanup switch handle return paths | *b55ef8508aPM: runtime: Do not call __rpm_callback() from rpm_idle() | *0bf874183bxen/privcmd: Fix a possible warning in privcmd_ioctl_mmap_resource() | *70966d6b0fx86/xen: Fix memory leak in xen_init_lock_cpu() | *23aef94eeax86/xen: Fix memory leak in xen_smp_intr_init{_pv}() | *03ab1c5c2fuprobes/x86: Allow to probe a NOP instruction with 0x66 prefix | *6fde666278ACPICA: Fix use-after-free in acpi_ut_copy_ipackage_to_ipackage() | *9cabd5f4f1clocksource/drivers/timer-ti-dm: Fix missing clk_disable_unprepare in dmtimer_systimer_init_clock() | *b73c76c3c4cpu/hotplug: Do not bail-out in DYING/STARTING sections | *6eb1802184cpu/hotplug: Make target_store() a nop when target == state | *cd130e2676futex: Resend potentially swallowed owner death notification | *fd8a10d44cfutex: Move to kernel/futex/ | *156144bd18mips: ralink: mt7621: do not use kzalloc too early | *186d59bb6amips: ralink: mt7621: soc queries and tests as functions | *8348da01e5mips: ralink: mt7621: define MT7621_SYSC_BASE with __iomem | *0f8e6fe09cclocksource/drivers/sh_cmt: Access registers according to spec | *a47de2fd3frapidio: rio: fix possible name leak in rio_register_mport() | *ec3f04f74frapidio: fix possible name leaks when rio_add_device() fails | *4662d8e6abdebugfs: fix error when writing negative value to atomic_t debugfs file | *7e8e8cc136lib/notifier-error-inject: fix error when writing -errno to debugfs file | *39b5e6130blibfs: add DEFINE_SIMPLE_ATTRIBUTE_SIGNED for signed value | *19c202e6e5cpufreq: amd_freq_sensitivity: Add missing pci_dev_put() | *93e3c80338genirq/irqdesc: Don't try to remove non-existing sysfs files | *435cc7d18cnfsd: don't call nfsd_file_put from client states seqfile display | *5030d4d2bfNFSD: Finish converting the NFSv2 GETACL result encoder | *e498675e06SUNRPC: Return true/false (not 1/0) from bool functions | *3e255dc210EDAC/i10nm: fix refcount leak in pci_get_dev_wrapper() | *740efb64cairqchip/wpcm450: Fix memory leak in wpcm450_aic_of_init() | *77b99b483firqchip: gic-pm: Use pm_runtime_resume_and_get() in gic_probe() | *5139cbc0c6thermal: core: fix some possible name leaks in error paths | *cab345f9d5platform/chrome: cros_usbpd_notify: Fix error handling in cros_usbpd_notify_init() | *0358bc7cc2perf/x86/intel/uncore: Fix reference count leak in __uncore_imc_init_box() | *433bd587dcperf/x86/intel/uncore: Fix reference count leak in snr_uncore_mmio_map() | *3485f19751perf/x86/intel/uncore: Fix reference count leak in hswep_has_limit_sbox() | *0021ef7dc6perf/x86/intel/uncore: Fix reference count leak in sad_cfg_iio_topology() | *c12b314bb2PNP: fix name memory leak in pnp_alloc_dev() | *f1c7a6af71selftests/efivarfs: Add checking of the test return value | *46be3ee1caMIPS: vpe-cmp: fix possible memory leak while module exiting | *e820a8192fMIPS: vpe-mt: fix possible memory leak while module exiting | *61d68cf2baocfs2: fix memory leak in ocfs2_stack_glue_init() | *e83b47580alib/fonts: fix undefined behavior in bit shift for get_default_font | *0df7d9ab6bproc: fixup uptime selftest | *07b8659b8etimerqueue: Use rb_entry_safe() in timerqueue_getnext() | *413b18866bplatform/x86: huawei-wmi: fix return value calculation | *4b46932283lib/debugobjects: fix stat count and optimize debug_objects_mem_init | *f790dfe816perf: Fix possible memleak in pmu_dev_alloc() | *418d21c0dfselftests/ftrace: event_triggers: wait longer for test_event_enable | *4ea765b106cpufreq: qcom-hw: Fix memory leak in qcom_cpufreq_hw_read_lut() | *c52d9c25d9fs: don't audit the capability check in simple_xattr_list() | *e4d0d13b46PM: hibernate: Fix mistake in kerneldoc comment | *1f62b8e50dx86/sgx: Reduce delay and interference of enclave release | *f5b88170f0alpha: fix syscall entry in !AUDUT_SYSCALL case | *a819ba80b9alpha: fix TIF_NOTIFY_SIGNAL handling | *eb2a732ef4cpuidle: dt: Return the correct numbers of parsed idle states | *3af4f5cb8asched/uclamp: Make asym_fits_capacity() use util_fits_cpu() | *23cb580e0csched/core: Introduce sched_asym_cpucap_active() | *41c2dba388sched/fair: Removed useless update of p->recent_used_cpu | *55ffeab089sched/uclamp: Make select_idle_capacity() use util_fits_cpu() | *4639bfbb83sched/uclamp: Make task_fits_capacity() use util_fits_cpu() | *309e50cbfesched/uclamp: Fix relationship between uclamp and migration margin | *54a766e196sched/fair: Cleanup task_util and capacity type | *26bffaf678ovl: remove privs in ovl_fallocate() | *5dc34f9aaaovl: remove privs in ovl_copyfile() | *9636e70ee2ovl: use ovl_copy_{real,upper}attr() wrappers | *a54843833covl: store lower path in ovl_inode | *163c5bbe7dtpm/tpm_crb: Fix error message in __crb_relinquish_locality() | *fe880e9df9tpm/tpm_ftpm_tee: Fix error handling in ftpm_mod_init() | *ebc73c4f26pstore: Avoid kcore oops by vmap()ing with VM_IOREMAP | *d4dcde11bfARM: mmp: fix timer_read delay | *95916147dcpstore/ram: Fix error return code in ramoops_probe() | *a31a647a3dseccomp: Move copy_seccomp() to no failure path. | *b8b76b8da6arm64: dts: armada-3720-turris-mox: Add missing interrupt for RTC | *820a5ccca7ARM: dts: turris-omnia: Add switch port 6 node | *b311f8e9f5ARM: dts: turris-omnia: Add ethernet aliases | *48ebdd06c9ARM: dts: armada-39x: Fix assigned-addresses for every PCIe Root Port | *f27dd04e44ARM: dts: armada-38x: Fix assigned-addresses for every PCIe Root Port | *1e53c63da8ARM: dts: armada-375: Fix assigned-addresses for every PCIe Root Port | *3af1a73e9eARM: dts: armada-xp: Fix assigned-addresses for every PCIe Root Port | *e4ed8133c4ARM: dts: armada-370: Fix assigned-addresses for every PCIe Root Port | *b335b6344eARM: dts: dove: Fix assigned-addresses for every PCIe Root Port | *5b3415e683arm64: dts: mediatek: mt6797: Fix 26M oscillator unit name | *93f5e66496arm64: dts: mediatek: pumpkin-common: Fix devicetree warnings | *debd938e21arm64: dts: mt2712-evb: Fix usb vbus regulators unit names | *b2c6397754arm64: dts: mt2712-evb: Fix vproc fixed regulators unit names | *96c972f835arm64: dts: mt2712e: Fix unit address for pinctrl node | *2cd1391c28arm64: dts: mt2712e: Fix unit_address_vs_reg warning for oscillators | *39877a3636arm64: dts: mt6779: Fix devicetree build warnings | *af431ce47eARM: dts: nuvoton: Remove bogus unit addresses from fixed-partition nodes | *0a616049ecarm64: dts: ti: k3-j721e-main: Drop dma-coherent in crypto node | *22a740824aarm64: dts: ti: k3-am65-main: Drop dma-coherent in crypto node | *b131304fe7perf/smmuv3: Fix hotplug callback leak in arm_smmu_pmu_init() | *b99fbe8d94perf/arm_dmc620: Fix hotplug callback leak in dmc620_pmu_init() | *9285b623bbperf: arm_dsu: Fix hotplug callback leak in dsu_pmu_init() | *e6318a7e19arm64: mm: kfence: only handle translation faults | *46ddfb9d1earm64: Treat ESR_ELx as a 64-bit register | *681e340128soc: ti: smartreflex: Fix PM disable depth imbalance in omap_sr_probe | *6eca7a2535soc: ti: knav_qmss_queue: Fix PM disable depth imbalance in knav_queue_probe | *972f8fc065soc: ti: knav_qmss_queue: Use pm_runtime_resume_and_get instead of pm_runtime_get_sync | *fe53048f2aarm: dts: spear600: Fix clcd interrupt | *75baeec464arm64: dts: qcom: sm6125: fix SDHCI CQE reg names | *0f9ac04191soc: qcom: apr: Add check for idr_alloc and of_property_read_string_index | *6855dd02c5soc: qcom: apr: make code more reuseable | *c9fb81a835arm64: dts: qcom: sm8250: drop bogus DP PHY clock | *53ffa57464arm64: dts: qcom: sm8350: fix UFS PHY registers | *d5a6bbd7a2arm64: dts: qcom: sm8250: fix UFS PHY registers | *3a52ff845farm64: dts: qcom: sm8150: fix UFS PHY registers | *800f8165e0arm64: dts: qcom: Correct QMP PHY child node name | *ee136f275bsoc: qcom: llcc: make irq truly optional | *aa7ffd4174arm64: dts: qcom: sm8250: correct LPASS pin pull down | *f94bacc616arm64: dts: qcom: pm660: Use unique ADC5_VCOIN address in node name | *d5bf119781drivers: soc: ti: knav_qmss_queue: Mark knav_acc_firmwares as static | *4707d5daf8ARM: dts: stm32: Fix AV96 WLAN regulator gpio property | *33647d7a46ARM: dts: stm32: Drop stm32mp15xc.dtsi from Avenger96 | *9f271a8660objtool, kcsan: Add volatile read/write instrumentation to whitelist | *51fe2dcba8arm64: dts: qcom: msm8916: Drop MSS fallback compatible | *a9fff3524farm64: dts: qcom: sdm845-cheza: fix AP suspend pin bias | *6487f48ea3arm64: dts: qcom: sdm630: fix UART1 pin bias | *6c0c9c5458ARM: dts: qcom: apq8064: fix coresight compatible | *0f9b088d68arm64: dts: qcom: msm8996: fix GPU OPP table | *270683fc7barm64: dts: qcom: msm8996: fix supported-hw in cpufreq OPP tables | *5c5a628914arm64: dts: qcom: msm8996: Add MSM8996 Pro support | *3f14048ee4arm64: dts: qcom: sm8250-sony-xperia-edo: fix touchscreen bias-disable | *89f79f8d7farm64: dts: qcom: ipq6018-cp01-c1: use BLSPI1 pins | *9db5992e72usb: musb: remove extra check in musb_gadget_vbus_draw | *adc063a491drm/amd/display: Manually adjust strobe for DCN303 * |50e12445abMerge 5.15.85 into android13-5.15-lts |\| | *5827ddaf45Linux 5.15.85 | *e22dbadac8net: loopback: use NET_NAME_PREDICTABLE for name_assign_type | *314e7a7836selftests: net: Use "grep -E" instead of "egrep" | *19a7814396Bluetooth: L2CAP: Fix u8 overflow | *f692abf139HID: uclogic: Add HID_QUIRK_HIDINPUT_FORCE quirk | *5325a884e2usb: dwc3: pci: Update PCIe device ID for USB3 controller on CPU sub-system for Raptor Lake | *367e1e3399igb: Initialize mailbox message for VF reset | *a301742b35xhci: Apply XHCI_RESET_TO_DEFAULT quirk to ADL-N | *5e959f0c4cUSB: serial: f81534: fix division by zero on line-speed change | *68fbe268d2USB: serial: f81232: fix division by zero on line-speed change | *3ec7f24b8bUSB: serial: cp210x: add Kamstrup RF sniffer PIDs | *2b092fab23USB: serial: option: add Quectel EM05-G modem | *6b41a35b41usb: gadget: uvc: Prevent buffer overflow in setup handler | *828112571cudf: Fix extending file within last block | *df1a2596c7udf: Do not bother looking for prealloc extents if i_lenExtents matches i_size | *63dbbd8f14udf: Fix preallocation discarding at indirect extent boundary | *79a97f08aeudf: Discard preallocation before extending file with a hole * |fb8d543b61Merge 5.15.84 into android13-5.15-lts |\| | *d68f50bfb0Linux 5.15.84 | *972707bae3net: fec: properly guard irq coalesce setup | *289721fe09ASoC: ops: Correct bounds check for second channel on SX controls | *de0866b94anvme-pci: clear the prp2 field when not used | *8bffa95ac1perf: Fix perf_pending_task() UaF | *825bd2af42ASoC: cs42l51: Correct PGA Volume minimum value | *91582b3a1anet: fec: don't reset irq coalesce settings to defaults on "ip link up" | *c772dab247can: mcba_usb: Fix termination command argument | *aa822de7decan: sja1000: fix size of OCR_MODE_MASK define | *09e08740d7pinctrl: meditatek: Startup with the IRQs disabled | *172a95026flibbpf: Use page size as max_entries when probing ring buffer map | *cf611d7867ASoC: ops: Check bounds for second channel in snd_soc_put_volsw_sx() | *a74b88e170ASoC: fsl_micfil: explicitly clear CHnF flags | *afac1e7d78ASoC: fsl_micfil: explicitly clear software reset bit | *9d933af8fenfp: fix use-after-free in area_cache_get() | *e1a4f5880dvfs: fix copy_file_range() averts filesystem freeze protection | *86e28ed25bx86/vdso: Conditionally export __vdso_sgx_enter_enclave() * |bfbd2237c1Merge 5.15.83 into android13-5.15-lts |\| | *fd6d66840bLinux 5.15.83 | *f895511de9io_uring: Fix a null-ptr-deref in io_tctx_exit_cb() | *f435c66d23io_uring: move to separate directory | *d9e1e5d8a7block: move CONFIG_BLOCK guard to top Makefile | *e5c0bc4ff5can: esd_usb: Allow REC and TEC to return to zero | *db6343a5b0s390/qeth: fix use-after-free in hsci | *a56c1cebe4s390/qeth: fix various format strings | *a6dba316c9macsec: add missing attribute validation for offload | *40500f1f47net: mvneta: Fix an out of bounds check | *b9274dbe39net: thunderbolt: fix memory leak in tbnet_open() | *7390c70bd4ipv6: avoid use-after-free in ip6_fragment() | *1beb475892net: plip: don't call kfree_skb/dev_kfree_skb() under spin_lock_irq() | *b08412a9cfnet: phy: mxl-gpy: fix version reporting | *dec5abd91axen/netback: fix build warning | *54d830e242dpaa2-switch: Fix memory leak in dpaa2_switch_acl_entry_add() and dpaa2_switch_acl_entry_remove() | *c7adcbd0fdethernet: aeroflex: fix potential skb leak in greth_init_rings() | *d962d42d63tipc: call tipc_lxc_xmit without holding node_read_lock | *f3b5dda26cnet: dsa: sja1105: fix memory leak in sja1105_setup_devlink_regions() | *5dab6fa068ipv4: Fix incorrect route flushing when table ID 0 is used | *ac566bd577ipv4: Fix incorrect route flushing when source address is deleted | *af4ccae4b7tipc: Fix potential OOB in tipc_link_proto_rcv() | *b8ce0e6f9fnet: hisilicon: Fix potential use-after-free in hix5hd2_rx() | *1685417774net: mdio: fix unbalanced fwnode reference count in mdio_device_release() | *6f4798ac9cnet: hisilicon: Fix potential use-after-free in hisi_femac_rx() | *114e65a221net: thunderx: Fix missing destroy_workqueue of nicvf_rx_mode_wq | *51c0494575net: microchip: sparx5: Fix missing destroy_workqueue of mact_queue | *99eec0a766ip_gre: do not report erspan version on GRE interface | *2891957853net: stmmac: fix "snps,axi-config" node property parsing | *5cb8f1a784gpio/rockchip: fix refcount leak in rockchip_gpiolib_register() | *b8c2f0392dnvme initialize core quirks before calling nvme_init_subsystem | *908b2da426NFC: nci: Bounds check struct nfc_target arrays | *d841cc1563i40e: Disallow ip4 and ip6 l4_4_bytes | *625a13850bi40e: Fix for VF MAC address 0 | *5538794dbdi40e: Fix not setting default xps_cpus after reset | *a6b30598fenet: mvneta: Prevent out of bounds read in mvneta_config_rss() | *e6e897d4fexen-netfront: Fix NULL sring after live migration | *eefd8953a7octeontx2-pf: Fix potential memory leak in otx2_init_tc() | *f88acaed07net: mdiobus: fix double put fwnode in the error path | *cc62d76928net: mdiobus: fwnode_mdiobus_register_phy() rework error handling | *ea113b570enet: encx24j600: Fix invalid logic in reading of MISTAT register | *8aae746d06net: encx24j600: Add parentheses to fix precedence | *a110287ef4mac802154: fix missing INIT_LIST_HEAD in ieee802154_if_add() | *e046421bedselftests: rtnetlink: correct xfrm policy rule in kci_test_ipsec_offload | *4fa8988a36net: dsa: sja1105: Check return value | *b35be171dfnet: dsa: hellcreek: Check return value | *a4c342e645net: dsa: ksz: Check return value | *edf7284a98Bluetooth: Fix not cleanup led when bt_init fails | *3322193949Bluetooth: 6LoWPAN: add missing hci_dev_put() in get_l2cap_conn() | *6c88c764e0vmxnet3: use correct intrConf reference when using extended queues | *5ad0d85757vmxnet3: correctly report encapsulated LRO packet | *5c014eb0edaf_unix: Get user_ns from in_skb in unix_diag_get_exact(). | *807a01a329drm: bridge: dw_hdmi: fix preference of RGB modes over YUV420 | *eb96fd3983net: broadcom: Add PTP_1588_CLOCK_OPTIONAL dependency for BCMGENET under ARCH_BCM2835 | *16eb678bcaigb: Allocate MSI-X vector when testing | *34c6367c94e1000e: Fix TX dispatch condition | *4271515f18gpio: amd8111: Fix PCI device reference count leak | *d57b60e9b3drm/bridge: ti-sn65dsi86: Fix output polarity setting bug | *f8b2965601netfilter: ctnetlink: fix compilation warning after data race fixes in ct mark | *246bcd05baca8210: Fix crash by zero initializing data | *80dad8df5fieee802154: cc2520: Fix error return code in cc2520_hw_init() | *dd9dcfb85cdrm/vmwgfx: Fix race issue calling pin_user_pages | *7b09ba9036netfilter: nft_set_pipapo: Actually validate intervals in fields after the first one | *6daaa84b62gpiolib: fix memory leak in gpiochip_setup_dev() | *1a1075d371gpiolib: check the 'ngpios' property in core gpiolib code | *70c5515c1cgpiolib: improve coding style for local variables | *3b714f25fcclk: Fix pointer casting to prevent oops in devm_clk_release() | *c142cba37dcan: af_can: fix NULL pointer dereference in can_rcv_filter | *104bb1f67eHID: ite: Enable QUIRK_TOUCHPAD_ON_OFF_REPORT on Acer Aspire Switch V 10 | *f755d11c55HID: core: fix shift-out-of-bounds in hid_report_raw_event | *2d4b310c32HID: hid-lg4ff: Add check for empty lbuf | *5e8021ae08HID: usbhid: Add ALWAYS_POLL quirk for some mice | *5e88c6f4aanet: dsa: sja1105: avoid out of bounds access in sja1105_init_l2_policing() | *1074fefce9drm/shmem-helper: Avoid vm_open error paths | *83e3da8bb9drm/shmem-helper: Remove errant put in error path | *249011f4c3drm/amdgpu/sdma_v4_0: turn off SDMA ring buffer in the s2idle suspend | *1e4fe9a154drm/vmwgfx: Don't use screen objects when SEV is active | *f6550976feKVM: s390: vsie: Fix the initialization of the epoch extension (epdx) field | *fe50a9bbebnet: mana: Fix race on per-CQ variable napi work_done | *a49894a5acBluetooth: Fix crash when replugging CSR fake controllers | *1dee2b5047Bluetooth: btusb: Add debug message for CSR controllers | *3ac29732a2mm/gup: fix gup_pud_range() for dax | *aad8bbd17amemcg: fix possible use-after-free in memcg_write_event_control() | *6fb8bc29bfmedia: v4l2-dv-timings.c: fix too strict blanking sanity checks | *a4c575541eRevert "ARM: dts: imx7: Fix NAND controller size-cells" | *28abc11459soundwire: intel: Initialize clock stop timeout | *22d800b378media: videobuf2-core: take mmap_lock in vb2_get_unmapped_area() | *5d0fa6fc88xen/netback: don't call kfree_skb() with interrupts disabled | *4422241cefxen/netback: do some code cleanup | *0fe29bd925xen/netback: Ensure protocol headers don't fall in the non-linear area | *f01677be31drm/bridge: anx7625: Fix edid_read break case in sp_tx_edid_read() | *ee2536830bcifs: fix use-after-free caused by invalid pointer `hostname` | *dc62f05f66rtc: cmos: avoid UIP when reading alarm time | *48ea4199afrtc: cmos: avoid UIP when writing alarm time | *3f52afc6edrtc: mc146818-lib: extract mc146818_avoid_UIP | *1a3f8c6cd2mm/khugepaged: invoke MMU notifiers in shmem/file collapse paths | *79ad784c9dmm/khugepaged: fix GUP-fast interaction by sending IPI | *d15cd6de01mm/khugepaged: take the right locks for page table retraction | *26f084e554net: usb: qmi_wwan: add u-blox 0x1342 composition | *029a7f1c5d9p/xen: check logical size for buffer size | *b398832893usb: dwc3: gadget: Disable GUSB2PHYCFG.SUSPHY for End Transfer | *e70a572440fbcon: Use kzalloc() in fbcon_prepare_logo() | *fd3768597dregulator: twl6030: fix get status of twl6032 regulators | *9f74b9aa8dASoC: soc-pcm: Add NULL check in BE reparenting | *dae93f4168btrfs: send: avoid unaligned encoded writes when attempting to clone range | *f54e1edf57selftests/net: Find nettest in current directory | *fccd454129ALSA: seq: Fix function prototype mismatch in snd_seq_expand_var_event | *542a563bb7regulator: slg51000: Wait after asserting CS pin | *3d1b5fde369p/fd: Use P9_HDRSZ for header size | *fe2d44e86eASoC: rt711-sdca: fix the latency time of clock stop prepare state machine transitions | *e945f3d809ARM: dts: rockchip: disable arm_global_timer on rk3066 and rk3188 | *c3b818c91aspi: mediatek: Fix DEVAPC Violation at KO Remove | *d9f0107be1ASoC: wm8962: Wait for updated value of WM8962_CLOCKING1 register | *7ae0262748ARM: 9266/1: mm: fix no-MMU ZERO_PAGE() implementation | *d81c62e312ARM: 9251/1: perf: Fix stacktraces for tracepoint events in THUMB2 kernels | *66717ad03bfs: use acquire ordering in __fget_light() | *1222e2364aARM: dts: rockchip: rk3188: fix lcdc1-rgb24 node name | *996fb29b06arm64: dts: rockchip: fix ir-receiver node names | *752138ef89ARM: dts: rockchip: fix ir-receiver node names | *8045971e40arm: dts: rockchip: remove clock-frequency from rtc | *5e9fb8013aarm: dts: rockchip: fix node name for hym8563 rtc | *2ed7137e91arm64: dts: rockchip: keep I2S1 disabled for GPIO function on ROCK Pi 4 series | *5a1122e1a8mmc: mtk-sd: Fix missing clk_disable_unprepare in msdc_of_clock_parse() | *282f52c954clk: Provide new devm_clk helpers for prepared and enabled clocks | *eb94a7a20fclk: generalize devm_clk_get() a bit * |20de784185ANDROID: fix up abi change in struct sdhci_host * |ebd1f8013dANDROID: gki_defconfig: add CONFIG_FUNCTION_ERROR_INJECTION * |112ff45bb5Merge 5.15.82 into android13-5.15-lts |\| | *d979030136Linux 5.15.82 | *48642f9431proc: proc_skip_spaces() shouldn't think it is working on C strings | *3eb9213f66proc: avoid integer type confusion in get_proc_long | *4a4073a2e2ipc/sem: Fix dangling sem_array access in semtimedop race | *53b9b1201eInput: raydium_ts_i2c - fix memory leak in raydium_i2c_send() | *571b6bbbf5char: tpm: Protect tpm_pm_suspend with locks | *f39891cfe7Revert "clocksource/drivers/riscv: Events are stopped during CPU suspend" | *a759057af7ACPI: HMAT: Fix initiator registration for single-initiator systems | *da8a794d71ACPI: HMAT: remove unnecessary variable initialization | *2d16161a2ci2c: imx: Only DMA messages with I2C_M_DMA_SAFE flag set | *950a05cb15i2c: npcm7xx: Fix error handling in npcm_i2c_init() | *db3f8da033serial: stm32: Deassert Transmit Enable on ->rs485_config() | *45f628f4fdserial: stm32: Use TC interrupt to deassert GPIO RTS in RS485 mode | *c60eae5b1dserial: stm32: Factor out GPIO RTS toggling into separate function | *041f8dc882ipv4: Fix route deletion when nexthop info is not specified | *25174d91e4ipv4: Handle attempt to delete multipath route when fib_info contains an nh reference | *a0ad247e55selftests: net: fix nexthop warning cleanup double ip typo | *532847b69cselftests: net: add delete nexthop route warning test | *e078355881Kconfig.debug: provide a little extra FRAME_WARN leeway when KASAN is enabled | *723fa02e0eparisc: Increase FRAME_WARN to 2048 bytes on parisc | *b951ab4b35mm: migrate: fix THP's mapcount on isolation | *c5eda6029cmm: __isolate_lru_page_prepare() in isolate_migratepages_block() | *bdb613ef17iommu/vt-d: Fix PCI device refcount leak in dmar_dev_scope_init() | *b6eea8b2e8iommu/vt-d: Fix PCI device refcount leak in has_external_pci() | *787d81d4ebnvme: fix SRCU protection of nvme_ns_head list | *12f237200criscv: kexec: Fixup irq controller broken in kexec crash path | *ac00301adbriscv: fix race when vmap stack overflow | *fa7a7d185eriscv: Sync efi page table's kernel mappings before switching | *d86d698925pinctrl: single: Fix potential division by zero | *98b15c7066ASoC: ops: Fix bounds check for _sx controls | *f88a6977f8KVM: x86/mmu: Fix race condition in direct_page_fault | *df4b177b48io_uring/poll: fix poll_refs race with cancelation | *4b702b7d11io_uring: make poll refs more robust | *1d58849ac2io_uring: cmpxchg for poll arm refs release | *cd1981a8c3io_uring: fix tw losing poll events | *62321dc7b0io_uring: update res mask in io_poll_check_events | *417d5ea6e7tracing: Free buffers when a used dynamic event is removed | *52fc245d15tracing: Fix race where histograms can be called before the event | *cb2b0612cdtracing/osnoise: Fix duration type | *615a996ff3drm/i915: Never return 0 if not all requests retired | *01a2b25ef2drm/i915: Fix negative value passed as remaining time | *ff1591ba33drm/amdgpu: enable Vangogh VCN indirect sram mode | *ac2d7fa908drm/amdgpu: temporarily disable broken Clang builds due to blown stack-frame | *57ee7bc4c6mmc: sdhci: Fix voltage switch delay | *bb8f809514mmc: sdhci-sprd: Fix no reset data and command after voltage switch | *4c7681c1a5mmc: sdhci-esdhc-imx: correct CQHCI exit halt state check | *01dbe4db59mmc: core: Fix ambiguous TRIM and DISCARD arg | *738946e355mmc: mmc_test: Fix removal of debugfs file | *635d051734net: stmmac: Set MAC's flow control register to reflect current settings | *9132dcdf3bv4l2: don't fall back to follow_pfn() if pin_user_pages_fast() fails | *76ad884be0pinctrl: intel: Save and restore pins in "direct IRQ" mode | *41296b85fax86/bugs: Make sure MSR_SPEC_CTRL is updated properly upon resume from S3 | *33021419fdnilfs2: fix NULL pointer dereference in nilfs_palloc_commit_free_entry() | *2e44dd9a8dtools/vm/slabinfo-gnuplot: use "grep -E" instead of "egrep" | *b60a8ad771error-injection: Add prompt for function error injection | *757eb00c4cALSA: dice: fix regression for Lexicon I-ONIX FW810S | *a1a96a6f30riscv: mm: Proper page permissions after initmem free | *823df3607driscv: vdso: fix section overlapping under some conditions | *6e035d5a2ahwmon: (coretemp) fix pci device refcount leak in nv1a_ram_new() | *7692700ac8hwmon: (coretemp) Check for null before removing sysfs attrs | *9b5836b9c4net: ethernet: renesas: ravb: Fix promiscuous mode after system resumed | *0dfb9a5663sctp: fix memory leak in sctp_stream_outq_migrate() | *fcb3e02161packet: do not set TP_STATUS_CSUM_VALID on CHECKSUM_COMPLETE | *04b995e963net: tun: Fix use-after-free in tun_detach() | *43ca0adf79afs: Fix fileserver probe RTT handling | *543d917f69net: mdiobus: fix unbalanced node reference count | *dca370e575net: hsr: Fix potential use-after-free | *1daec08156tipc: re-fetch skb cb after tipc_msg_validate | *16a64dc265dsa: lan9303: Correct stat name | *766086ea8cnet: wwan: iosm: fix dma_alloc_coherent incompatible pointer type | *c667751a42net: wwan: iosm: fix kernel test robot reported error | *9c584d6d9cnet: ethernet: nixge: fix NULL dereference | *8782b32ef8net/9p: Fix a potential socket leak in p9_socket_open | *6fc9425bffnet: net_netdev: Fix error handling in ntb_netdev_init_module() | *3bc893ef36net: ethernet: ti: am65-cpsw: fix error handling in am65_cpsw_nuss_probe() | *7730904f50net: phy: fix null-ptr-deref while probe() failed | *59b54f0563wifi: mac8021: fix possible oob access in ieee80211_get_rate_duration | *dc0853f8b5wifi: cfg80211: don't allow multi-BSSID in S1G | *88a6fe3707wifi: cfg80211: fix buffer overflow in elem comparison | *08fff7aaebaquantia: Do not purge addresses when setting the number of rings | *2a7aa52573qlcnic: fix sleep-in-atomic-context bugs caused by msleep | *7b734d26f0can: m_can: Add check for devm_clk_get | *ea8dc27bb0can: m_can: pci: add missing m_can_class_free_dev() in probe/remove methods | *b1d2a8e02acan: etas_es58x: es58x_init_netdev(): free netdev when register_candev() | *e53da04e37can: cc770: cc770_isa_probe(): add missing free_cc770dev() | *d452a71995can: sja1000_isa: sja1000_isa_probe(): add missing free_sja1000dev() | *372eb550fanet/mlx5e: Fix use-after-free when reverting termination table | *839eeab03cnet/mlx5: Fix uninitialized variable bug in outlen_write() | *34feea3bfbnet/mlx5: DR, Fix uninitialized var warning | *3485ef2aabnet/mlx5: DR, Rename list field in matcher struct to list_node | *9fc27d22cde100: Fix possible use after free in e100_xmit_prepare | *0d9f5bd54biavf: Fix error handling in iavf_init_module() | *b0b2b9050ciavf: remove redundant ret variable | *69501d8205fm10k: Fix error handling in fm10k_init_module() | *5e3657dedei40e: Fix error handling in i40e_init_module() | *7109e94109ixgbevf: Fix resource leak in ixgbevf_init_module() | *196ea810e2of: property: decrement node refcount in of_fwnode_get_reference_args() | *36164db278nvmem: rmem: Fix return value check in rmem_read() | *e376183167bpf: Do not copy spin lock field from user in bpf_selem_alloc | *45f6e81863hwmon: (ibmpex) Fix possible UAF when ibmpex_register_bmc() fails | *a90251376chwmon: (i5500_temp) fix missing pci_disable_device() | *eeb31b828dhwmon: (ina3221) Fix shunt sum critical calculation | *9514b95cachwmon: (ltc2947) fix temperature scaling | *0140e079a4libbpf: Handle size overflow for ringbuf mmap | *06d5790e7dARM: at91: rm9200: fix usb device clock id | *d074f173fbscripts/faddr2line: Fix regression in name resolution on ppc64le | *ee3d37d796bpf, perf: Use subprog name when reporting subprog ksymbol | *ec02fc0a41iio: light: rpr0521: add missing Kconfig dependencies | *f7419fc42aiio: health:afe4404: Fix oob read in afe4404_[read|write]_raw | *e7e76a77aaiio: health: afe4403: Fix oob read in afe4403_read_raw | *ebdca90efbdrm/amdgpu: Partially revert "drm/amdgpu: update drm_display_info correctly when the edid is read" | *c365d3c3e5drm/amdgpu: update drm_display_info correctly when the edid is read | *df5346466edrm/display/dp_mst: Fix drm_dp_mst_add_affected_dsc_crtcs() return code | *044da1a371btrfs: qgroup: fix sleep from invalid context bug in btrfs_qgroup_inherit() | *da86809ab8btrfs: move QUOTA_ENABLED check to rescan_should_stop from btrfs_qgroup_rescan_worker | *5d66eadc1cspi: spi-imx: Fix spi_bus_clk if requested clock is higher than input clock | *6b4544a131btrfs: free btrfs_path before copying inodes to userspace | *c7ae3beceebtrfs: sink iterator parameter to btrfs_ioctl_logical_to_ino | *acc2f40b98erofs: fix order >= MAX_ORDER warning due to crafted negative i_size | *ca9f27448adrm/i915/gt: Use i915_vm_put on ppgtt_create error paths | *c2f2972889drm/i915: Create a dummy object for gen6 ppgtt | *918002bdbearm64: mte: Avoid setting PG_mte_tagged if no tags cleared or restored * |d753150bdcRevert "serial: Add rs485_supported to uart_port" * |8ccd9528beRevert "serial: fsl_lpuart: Fill in rs485_supported" * |a924bb92c6Merge 5.15.81 into android13-5.15-lts |\| | *e4a7232c91Linux 5.15.81 | *5c5c563a08cifs: fix missed refcounting of ipc tcon | *ee2d04f23bdrm/i915: fix TLB invalidation for Gen12 video and compute engines | *bef834845ddrm/amdgpu: always register an MMU notifier for userptr | *7901de7aa8drm/amdgpu: Enable Aldebaran devices to report CU Occupancy | *e7bf1fe538drm/amd/display: No display after resume from WB/CB | *5033cba00cdrm/amd/dc/dce120: Fix audio register mapping, stop triggering KASAN | *b8dc245909btrfs: sysfs: normalize the error handling branch in btrfs_init_sysfs() | *914baca57abtrfs: use kvcalloc in btrfs_get_dev_zone_info | *c1e6d4bfdebtrfs: zoned: fix missing endianness conversion in sb_write_pointer | *d88bf6be02btrfs: free btrfs_path before copying subvol info to userspace | *f218b404fcbtrfs: free btrfs_path before copying fspath to userspace | *fea9397101btrfs: free btrfs_path before copying root refs to userspace | *7d0c25b5fegenirq: Take the proposed affinity at face value if force==true | *f17657cce0irqchip/gic-v3: Always trust the managed affinity provided by the core code | *52a93f2dcfgenirq: Always limit the affinity to online CPUs | *599cf4b845genirq/msi: Shutdown managed interrupts with unsatifiable affinities | *7aed1dd5d2wifi: wilc1000: validate number of channels | *e9de501cf7wifi: wilc1000: validate length of IEEE80211_P2P_ATTR_CHANNEL_LIST attribute | *143232cb5awifi: wilc1000: validate length of IEEE80211_P2P_ATTR_OPER_CHANNEL attribute | *cd9c486971wifi: wilc1000: validate pairwise and authentication suite offsets | *f2fb18d429fuse: lock inode unconditionally in fuse_fallocate() | *bb1c33bdf4dm integrity: clear the journal on suspend | *20ad31b09edm integrity: flush the journal on suspend | *5ca2110ba5gpu: host1x: Avoid trying to use GART on Tegra20 | *97f47617e8scsi: iscsi: Fix possible memory leak when device_register() failed | *56ab7f237enet: usb: qmi_wwan: add Telit 0x103a composition | *e2e33f213dtcp: configurable source port perturb table size | *269928e5c7platform/x86: ideapad-laptop: Fix interrupt storm on fn-lock toggle on some Yoga laptops | *17d995dc69platform/x86: hp-wmi: Ignore Smart Experience App event | *e85bdc7872zonefs: fix zone report size in __zonefs_io_error() | *982fcd83fbdrm/amdgpu: disable BACO support on more cards | *ea11f8197dplatform/x86: acer-wmi: Enable SW_TABLET_MODE on Switch V 10 (SW5-017) | *09af15e691platform/x86: asus-wmi: add missing pci_dev_put() in asus_wmi_set_xusb2pr() | *ba040bea9dxen/platform-pci: add missing free_irq() in error path | *6815b2087dxen-pciback: Allow setting PCI_MSIX_FLAGS_MASKALL too | *4c13ddb74fASoC: stm32: dfsdm: manage cb buffers cleanup | *dd82295a23Input: i8042 - apply probe defer to more ASUS ZenBook models | *e12e121febInput: soc_button_array - add Acer Switch V 10 to dmi_use_low_level_irq[] | *9f5c167074Input: soc_button_array - add use_low_level_irq module parameter | *aaef86eac9Input: goodix - try resetting the controller when no config is set | *e2223f5fbbserial: 8250: 8250_omap: Avoid RS485 RTS glitch on ->set_termios() | *4e208294detools: iio: iio_generic_buffer: Fix read size | *0d0e2545faASoC: Intel: bytcht_es8316: Add quirk for the Nanote UMPC-01 | *e394cf9d7aInput: synaptics - switch touchpad on HP Laptop 15-da3001TU to RMI mode | *96b5d11777x86/ioremap: Fix page aligned size calculation in __ioremap_caller() | *d048f74815x86/pm: Add enumeration check before spec MSRs save/restore setup | *070e3560bfx86/tsx: Add a feature bit for TSX control MSR support | *1430c98ebbKVM: x86: remove exit_int_info warning in svm_handle_exit | *27550a5930KVM: x86: add kvm_leave_nested | *3e87cb0caaKVM: x86: nSVM: harden svm_free_nested against freeing vmcb02 while still in use | *6425c590d0KVM: x86: forcibly leave nested mode on vCPU reset | *f42ebf972aKVM: x86: nSVM: leave nested mode on vCPU free | *7b3c9405b2mm: vmscan: fix extreme overreclaim and swap floods | *feb2eda5e1gcov: clang: fix the buffer overflow issue | *ea6aa25c9anilfs2: fix nilfs_sufile_mark_dirty() not set segment usage as dirty | *9d97a9fbfcusb: dwc3: gadget: Clear ep descriptor last | *02632ea4dfusb: dwc3: gadget: Return -ESHUTDOWN on ep disable | *765ca3e63fusb: dwc3: gadget: conditionally remove requests | *7945cbf866bus: ixp4xx: Don't touch bit 7 on IXP42x | *39c039018aiio: core: Fix entry not deleted when iio_register_sw_trigger_type() fails | *0791a5ddbaiio: light: apds9960: fix wrong register for gesture gain | *f0158b9bfcarm64: dts: rockchip: lower rk3399-puma-haikou SD controller clock frequency | *277d19ec28ext4: fix use-after-free in ext4_ext_shift_extents | *c9d133100busb: cdnsp: fix issue with ZLP - added TD_SIZE = 1 | *c2ad434cd4usb: cdnsp: Fix issue with Clear Feature Halt Endpoint | *1d91c64887usb: dwc3: exynos: Fix remove() function | *0a216625c3KVM: arm64: pkvm: Fixup boot mode to reflect that the kernel resumes from EL1 | *f0044a4a31mmc: sdhci-brcmstb: Fix SDHCI_RESET_ALL for CQHCI | *8e6940979bmmc: sdhci-brcmstb: Enable Clock Gating to save power | *24b46bfa96mmc: sdhci-brcmstb: Re-organize flags | *227543ccacnios2: add FORCE for vmlinuz.gz | *6a4ea16a67init/Kconfig: fix CC_HAS_ASM_GOTO_TIED_OUTPUT test with dash | *c4a9046c27lib/vdso: use "grep -E" instead of "egrep" | *5fefdceafbs390/crashdump: fix TOD programmable field size | *592b6fd74anet: thunderx: Fix the ACPI memory leak | *697eb30a35octeontx2-af: Fix reference count issue in rvu_sdp_init() | *6ba1687ea1octeontx2-pf: Add check for devm_kcalloc | *26c31e7c73net: enetc: preserve TX ring priority across reconfiguration | *0e16bbf616net: enetc: cache accesses to &priv->si->hw | *68de40f66anet: enetc: manage ENETC_F_QBV in priv->active_offloads only when enabled | *5c0858e142nfc: st-nci: fix incorrect sizing calculations in EVT_TRANSACTION | *e09243fb16nfc: st-nci: fix memory leaks in EVT_TRANSACTION | *dca20b7a19nfc: st-nci: fix incorrect validating logic in EVT_TRANSACTION | *67d638f8efs390/dasd: fix no record found for raw_track_access | *88277853cfarcnet: fix potential memory leak in com20020_probe() | *1d44ec8507ipv4: Fix error return code in fib_table_insert() | *918e83c6bfdccp/tcp: Reset saddr on failure after inet6?_hash_connect(). | *8ce9b1c97ffs: do not update freeing inode i_io_list | *8db9e60cdfnetfilter: flowtable_offload: add missing locking | *c1da3bfca1netfilter: ipset: restore allowing 64 clashing elements in hash:net,iface | *606091b2f6dma-buf: fix racing conflict of dma_heap_add() | *8af9450befbnx2x: fix pci device refcount leak in bnx2x_vf_is_pcie_pending() | *251bcf6cfbregulator: twl6030: re-add TWL6032_SUBCLASS | *6258a8f913NFC: nci: fix memory leak in nci_rx_data_packet() | *ffe6021154net: sched: allow act_ct to be built without NF_NAT | *a05c0f9511net: sparx5: fix error handling in sparx5_port_open() | *182ef20f0fsfc: fix potential memleak in __ef100_hard_start_xmit() | *2da022fac9net: wwan: iosm: use ACPI_FREE() but not kfree() in ipc_pcie_read_bios_cfg() | *a48b345b87xfrm: Fix ignored return value in xfrm6_init() | *19989e1635xfrm: Fix oops in __xfrm_state_delete() | *46d450067ftipc: check skb_linearize() return value in tipc_disc_rcv() | *33fb115a76tipc: add an extra conn_get in tipc_conn_alloc | *4ae907c45ftipc: set con sock in tipc_conn_alloc | *ef866d9ea9net/mlx5: Fix handling of entry refcount when command is not issued to FW | *3101318939net/mlx5: Fix FW tracer timestamp calculation | *1eaabb5bbbnet/mlx5: Do not query pci info while pci disabled | *8180099b2anetfilter: ipset: regression in ip_set_hash_ip.c | *448b627370Drivers: hv: vmbus: fix possible memory leak in vmbus_device_register() | *082c31cb99Drivers: hv: vmbus: fix double free in the error path of vmbus_add_channel_work() | *7fdd9daa5bmacsec: Fix invalid error code set | *e8fb93a079nfp: add port from netdev validation for EEPROM access | *e44e424ed9nfp: fill splittable of devlink_port_attrs correctly | *527046c138net: pch_gbe: fix pci device refcount leak while module exiting | *f77c84dd5bocteontx2-af: debugsfs: fix pci device refcount leak | *cd581ffd8dnet/qla3xxx: fix potential memleak in ql3xxx_send() | *a8976074e2net: mvpp2: fix possible invalid pointer dereference | *3a4cc56cd1net/mlx4: Check retval of mlx4_bitmap_init | *c368220e17net: ethernet: mtk_eth_soc: fix error handling in mtk_open() | *d9729437b2ARM: dts: imx6q-prti6q: Fix ref/tcxo-clock-frequency properties | *1c0b6a97c4ARM: mxs: fix memory leak in mxs_machine_init() | *ecff08f3c4iavf: Fix race condition between iavf_shutdown and iavf_remove | *31147d4e90iavf: Do not restart Tx queues after reset task failure | *232942b26ciavf: Fix a crash during reset task | *0600615d01netfilter: nf_tables: do not set up extensions for end interval | *60387731e6netfilter: conntrack: Fix data-races around ct mark | *ee3ccd1abb9p/fd: fix issue of list_del corruption in p9_fd_cancel() | *131c2eeabcnet: pch_gbe: fix potential memleak in pch_gbe_tx_queue() | *f58df483ffnfc/nci: fix race with opening and closing | *da22d7410anet: dsa: sja1105: disallow C45 transactions on the BASE-TX MDIO bus | *38fe0988bdrxrpc: Fix race between conn bundle lookup and bundle removal [ZDI-CAN-15975] | *d92151b465rxrpc: Use refcount_t rather than atomic_t | *3c33e41fa5rxrpc: Allow list of in-use local UDP endpoints to be viewed in /proc | *46cefa2689net: liquidio: simplify if expression | *95500ee0b3selftests: mptcp: fix mibit vs mbit mix up | *f8c4da198eselftests: mptcp: more stable simult_flows tests | *1c0efab08cARM: dts: at91: sam9g20ek: enable udc vbus gpio pinctrl | *ade662f3f2tee: optee: fix possible memory leak in optee_register_device() | *d1dd119134bus: sunxi-rsb: Support atomic transfers | *b1ed61e706bus: sunxi-rsb: Remove the shutdown callback | *61a41d1abcregulator: core: fix UAF in destroy_regulator() | *a85c0db3f5spi: dw-dma: decrease reference count in dw_spi_dma_init_mfld() | *d9f9b3255bregulator: core: fix kobject release warning and memory leak in regulator_register() | *bd419c7c68ASoC: max98373: Add checks for devm_kcalloc | *f9bc4a18e7scsi: storvsc: Fix handling of srb_status and capacity change events | *c2153fe2d0x86/hyperv: Restore VP assist page after cpu offlining/onlining | *b2ddd76237ASoC: soc-pcm: Don't zero TDM masks in __soc_pcm_open() | *dd62cb7e6fASoC: sgtl5000: Reset the CHIP_CLK_CTRL reg on remove | *d80ffd4823ASoC: hdac_hda: fix hda pcm buffer overflow issue | *10bee7eb2aARM: dts: am335x-pcm-953: Define fixed regulators in root node | *8fe533c0f9af_key: Fix send_acquire race with pfkey_register | *0c69a4658exfrm: replay: Fix ESN wrap around for GSO | *ecc6ce4fdfxfrm: fix "disable_policy" on ipv4 early demux | *5a792c1d4dMIPS: pic32: treat port as signed integer | *144452b421RISC-V: vdso: Do not add missing symbols to version section in linker script | *799970b8ccALSA: usb-audio: add quirk to fix Hamedal C20 disconnect issue | *38b09dc14fRevert "drm/amdgpu: Revert "drm/amdgpu: getting fan speed pwm for vega10 properly"" | *44d50fccf8nvmet: fix memory leak in nvmet_subsys_attr_model_store_locked | *5adc12d9e2arm64/syscall: Include asm/ptrace.h in syscall_wrapper header. | *1340f02773block, bfq: fix null pointer dereference in bfq_bio_bfqg() | *86d4dca4a6drm: panel-orientation-quirks: Add quirk for Acer Switch V 10 (SW5-017) | *b90e6234f5scsi: scsi_debug: Make the READ CAPACITY response compliant with ZBC | *cdbba6a4descsi: ibmvfc: Avoid path failures during live migration | *6e8124a151platform/x86/intel/hid: Add some ACPI device IDs | *32735e24f4platform/x86/intel/pmt: Sapphire Rapids PMT errata fix | *83a6823016platform/x86: touchscreen_dmi: Add info for the RCA Cambio W101 v2 2-in-1 | *f707986a14platform/x86: ideapad-laptop: Disable touchpad_switch | *5e38740ae5Revert "net: macsec: report real_dev features when HW offloading is enabled" | *26b72202eeselftests/bpf: Add verifier test for release_reference() | *8395e3f98cspi: stm32: fix stm32_spi_prepare_mbr() that halves spi clk for every run | *d04722f280wifi: ath11k: Fix QCN9074 firmware boot on x86 | *9cc96a20a9wifi: mac80211: Fix ack frame idr leak when mesh has no route | *86f90014e7wifi: airo: do not assign -1 to unsigned char | *f5558fbda0audit: fix undefined behavior in bit shift for AUDIT_BIT | *af5de982ffriscv: dts: sifive unleashed: Add PWM controlled LEDs | *ee34a19dbewifi: mac80211_hwsim: fix debugfs attribute ps with rc table support | *3513785dc1wifi: mac80211: fix memory free error when registering wiphy fail | *855485d31eceph: fix NULL pointer dereference for req->r_session | *729c9ad294ceph: Use kcalloc for allocating multiple elements | *d276fb4a7ebinder: validate alloc->mm in ->mmap() handler | *5277e3d633x86/sgx: Add overflow check in sgx_validate_offset_length() | *b5a838ba47x86/sgx: Create utility to validate user provided offset and length | *2f6e2de3a5ceph: avoid putting the realm twice when decoding snaps fails | *8bef55d793ceph: do not update snapshot context when there is no new snapshot | *cdee3136c9iio: pressure: ms5611: fixed value compensation bug | *5d6696e79diio: ms5611: Simplify IO callback parameters | *f0ee88e83cnvme-pci: add NVME_QUIRK_BOGUS_NID for Netac NV7000 | *a61716cd24nvme-pci: disable write zeroes on various Kingston SSD | *19b60f3363nvme-pci: disable namespace identifiers for the MAXIO MAP1001 | *d537e19306nvme-pci: add NVME_QUIRK_BOGUS_NID for Micron Nitro | *af03ce894cnvme: add a bogus subsystem NQN quirk for Micron MTFDKBA2T0TFH | *c6803faa6adrm/display: Don't assume dual mode adaptors support i2c sub-addressing | *d2284fe43cata: libata-core: do not issue non-internal commands once EH is pending | *e09583e83eata: libata-scsi: simplify __ata_scsi_queuecmd() | *a9059e338fcifs: Fix connections leak when tlink setup failed | *81d583baa5cifs: support nested dfs links over reconnect | *dbc0ea91becifs: split out dfs code from cifs_reconnect() | *b3ce844d23cifs: introduce new helper for cifs_reconnect() | *2ea600b598sctp: clear out_curr if all frag chunks of current msg are pruned | *1f9f346fbbsctp: remove the unnecessary sinfo_stream check in sctp_prsctp_prune_unsent | *e8915faa9ftty: serial: fsl_lpuart: don't break the on-going transfer when global reset | *bd19013935serial: fsl_lpuart: Fill in rs485_supported | *87c81c19cdserial: Add rs485_supported to uart_port | *c08f4ea79fASoC: fsl_asrc fsl_esai fsl_sai: allow CONFIG_PM=N | *d1e4288d2aASoC: fsl_sai: use local device pointer * |e66b45d527Merge branch 'android13-5.15' into android13-5.15-lts * |72d681a01dRevert "net: use struct_group to copy ip/ipv6 header addresses" * |c46ed1b2d7Merge 5.15.80 into android13-5.15-lts |\| | *71e496bd33Linux 5.15.80 | *b63ddb3ba6ntfs: check overflow when iterating ATTR_RECORDs | *ab6a1bb17entfs: fix out-of-bounds read in ntfs_attr_find() | *5330c423b8ntfs: fix use-after-free in ntfs_attr_find() | *43bbadb7e4net/9p: use a dedicated spinlock for trans_fd | *9357fca9damm: fs: initialize fsdata passed to write_begin/write_end interface | *b334ab4c33wifi: wext: use flex array destination for memcpy() | *0e07032b4b9p/trans_fd: always use O_NONBLOCK read/write | *7c7b7476b5gfs2: Switch from strlcpy to strscpy | *28275a7c84gfs2: Check sb_bsize_shift after reading superblock | *a4f1a01b2e9p: trans_fd/p9_conn_cancel: drop client lock earlier | *f7b0e95071kcm: close race conditions on sk_receive_queue | *27d706b0d3kcm: avoid potential race in kcm_tx_work | *b49026d9c8tcp: cdg: allow tcp_cdg_release() to be called multiple times | *e41cbf98dfmacvlan: enforce a consistent minimal mtu | *d5f7f6e63fInput: i8042 - fix leaking of platform device on module removal | *c49cc2c059kprobes: Skip clearing aggrprobe's post_handler in kprobe-on-ftrace case | *71beab7119scsi: scsi_debug: Fix possible UAF in sdebug_add_host_helper() | *a636772988scsi: target: tcm_loop: Fix possible name leak in tcm_loop_setup_hba_bus() | *cb7893c85enet: use struct_group to copy ip/ipv6 header addresses | *9b8c0c88f4tracing: Fix warning on variable 'struct trace_array' | *73cf0ff9a3ring-buffer: Include dropped pages in counting dirty patches | *35c60b4e8cperf: Improve missing SIGTRAP checking | *2ac6276864serial: 8250_lpss: Use 16B DMA burst with Elkhart Lake | *b1a27b2aadnvme: ensure subsystem reset is single threaded | *bccece3c33nvme: restrict management ioctls to admin | *8cddb0d96bperf/x86/intel/pt: Fix sampling using single range output | *8e2f33c598misc/vmw_vmci: fix an infoleak in vmci_host_do_receive_datagram() | *9a72a46cb0docs: update mediator contact information in CoC doc | *a99a547658mmc: sdhci-pci: Fix possible memory leak caused by missing pci_dev_put() | *4a1b6f7839mmc: sdhci-pci-o2micro: fix card detect fail issue caused by CD# debounce timeout | *fd285d4215mmc: core: properly select voltage range without power cycle | *8a9bae5f1bfirmware: coreboot: Register bus in module init | *052d0e79efiommu/vt-d: Set SRE bit only when hardware has SRS cap | *c31a792a82iommu/vt-d: Preset Access bit for IOVA in FL non-leaf paging entries | *11edbdee43scsi: zfcp: Fix double free of FSF request when qdio send fails | *fdf87b5b30net: phy: marvell: add sleep time after enabling the loopback bit | *9648d760edmaccess: Fix writing offset in case of fault in strncpy_from_kernel_nofault() | *fdd57c20d4Input: iforce - invert valid length check when fetching device IDs | *0cafb719beserial: 8250_lpss: Configure DMA also w/o DMA filter | *59f6596697serial: 8250: Flush DMA Rx on RLSI | *118b52c2aeserial: 8250: Fall back to non-DMA Rx if IIR_RDI occurs | *6ffce7a92edm ioctl: fix misbehavior if list_versions races with module loading | *2b104973f7iio: pressure: ms5611: changed hardcoded SPI speed to value limited | *1678d4abb2iio: adc: mp2629: fix potential array out of bound access | *bd22c232eaiio: adc: mp2629: fix wrong comparison of channel | *656f670613iio: trigger: sysfs: fix possible memory leak in iio_sysfs_trig_init() | *1bf8c0aff8iio: adc: at91_adc: fix possible memory leak in at91_adc_allocate_trigger() | *afc0aea702usb: typec: mux: Enter safe mode only when pins need to be reconfigured | *8236628a54usb: cdns3: host: fix endless superspeed hub port reset | *ead83b0db8usb: chipidea: fix deadlock in ci_otg_del_timer | *cc9e6d8c55usb: add NO_LPM quirk for Realforce 87U Keyboard | *70eca1d261USB: serial: option: add Fibocom FM160 0x0111 composition | *1b6a54885cUSB: serial: option: add u-blox LARA-L6 modem | *b0467d0059USB: serial: option: add u-blox LARA-R6 00B modem | *95688a8a57USB: serial: option: remove old LARA-R6 PID | *53dee78ea3USB: serial: option: add Sierra Wireless EM9191 | *e7764e88e6USB: bcma: Make GPIO explicitly optional | *a190a83db2speakup: fix a segfault caused by switching consoles | *b3c6edbee4slimbus: stream: correct presence rate frequencies | *6b35ac8315slimbus: qcom-ngd: Fix build error when CONFIG_SLIM_QCOM_NGD_CTRL=y && CONFIG_QCOM_RPROC_COMMON=m | *0f847462feRevert "usb: dwc3: disable USB core PHY management" | *23ad214a86ALSA: hda/realtek: Fix the speaker output on Samsung Galaxy Book Pro 360 | *a36b505749ALSA: hda/realtek: fix speakers for Samsung Galaxy Book Pro | *02b94885b2ALSA: usb-audio: Drop snd_BUG_ON() from snd_usbmidi_output_open() | *7176d6f3addrm/amd/display: Add HUBP surface flip interrupt handler | *e57daa7503tracing: kprobe: Fix potential null-ptr-deref on trace_array in kprobe_event_gen_test_exit() | *3a41c0f2a5tracing: kprobe: Fix potential null-ptr-deref on trace_event_file in kprobe_event_gen_test_exit() | *7291dec4f2tracing: Fix race where eprobes can be called before the event | *6517b97134tracing: Fix wild-memory-access in register_synth_event() | *07ba4f0603tracing: Fix memory leak in test_gen_synth_cmd() and test_empty_synth_event() | *8b318f3032tracing/ring-buffer: Have polling block on watermark | *2c21ee020ctracing: Fix memory leak in tracing_read_pipe() | *00f74b1a98ring_buffer: Do not deactivate non-existant pages | *1bea037a1aftrace: Fix null pointer dereference in ftrace_add_mod() | *fadfcf39fbftrace: Optimize the allocation for mcount entries | *5c5f264289ftrace: Fix the possible incorrect kernel message | *2ab2494162cifs: add check for returning value of SMB2_set_info_init | *5783abda58net: thunderbolt: Fix error handling in tbnet_init() | *80e590aeb1net: microchip: sparx5: Fix potential null-ptr-deref in sparx_stats_init() and sparx5_start() | *4a55aec142cifs: Fix wrong return value checking when GETFLAGS | *c8baf1fc24net/x25: Fix skb leak in x25_lapb_receive_frame() | *af4b57fa6bnet: ag71xx: call phylink_disconnect_phy if ag71xx_hw_enable() fail in ag71xx_open() | *61404a182ecifs: add check for returning value of SMB2_close_init | *d3233f4bf3platform/surface: aggregator: Do not check for repeated unsequenced packets | *6969171403platform/x86/intel: pmc: Don't unconditionally attach Intel PMC when virtualized | *7d93417d59drbd: use after free in drbd_create_device() | *fc16a2c81abridge: switchdev: Fix memory leaks when changing VLAN protocol | *3d90a668c4net: hns3: fix setting incorrect phy link ksettings for firmware in resetting process | *3f7b2ef8fenet: ena: Fix error handling in ena_init() | *2540eea1bdnet: ionic: Fix error handling in ionic_init_module() | *c08c13cb13xen/pcpu: fix possible memory leak in register_pcpu() | *97009f07f2net: dsa: make dsa_master_ioctl() see through port_hwtstamp_get() shims | *88da008e5enet: mhi: Fix memory leak in mhi_net_dellink() | *8f839715d0bnxt_en: Remove debugfs when pci_register_driver failed | *b88713d92bnet: caif: fix double disconnect client in chnl_net_open() | *6d24034160net: macvlan: Use built-in RCU list checking | *596230471dmISDN: fix misuse of put_device() in mISDN_register_device() | *07a6a8cf17net: liquidio: release resources when liquidio driver open failed | *19feb6cf41soc: imx8m: Enable OCOTP clock before reading the register | *8c54d706d8net: stmmac: ensure tx function is not running in stmmac_xdp_release() | *6219f46c2bnet: hinic: Fix error handling in hinic_module_init() | *7a05e39296mISDN: fix possible memory leak in mISDN_dsp_element_register() | *0ee6455c9cnet: bgmac: Drop free_netdev() from bgmac_enet_remove() | *7ff4fa179ebpf: Initialize same number of free nodes for each pcpu_freelist | *12f178cf05MIPS: Loongson64: Add WARN_ON on kexec related kmalloc failed | *a4d6e024beMIPS: fix duplicate definitions for exported symbols | *44142b652anfp: change eeprom length to max length enumerators | *f23058dc23ata: libata-transport: fix error handling in ata_tdev_add() | *67b2193146ata: libata-transport: fix error handling in ata_tlink_add() | *e7bb1b7a7bata: libata-transport: fix error handling in ata_tport_add() | *377ff82c33ata: libata-transport: fix double ata_host_put() in ata_tport_add() | *494df0b0efarm64: dts: imx8mn: Fix NAND controller size-cells | *7178d568f7arm64: dts: imx8mm: Fix NAND controller size-cells | *8ccf18c82aARM: dts: imx7: Fix NAND controller size-cells | *e884a6c2d4drm: Fix potential null-ptr-deref in drm_vblank_destroy_worker() | *07e56de876drm/drv: Fix potential memory leak in drm_dev_init() | *45c300613bdrm/panel: simple: set bpc field for logic technologies displays | *779f3f9e0cdrm/vc4: kms: Fix IS_ERR() vs NULL check for vc4_kms | *97e5b508e9pinctrl: devicetree: fix null pointer dereferencing in pinctrl_dt_to_map | *9a77b8557fparport_pc: Avoid FIFO port location truncation | *5d03c2911csiox: fix possible memory leak in siox_device_add() | *530e987a02arm64: Fix bit-shifting UB in the MIDR_CPU_MODEL() macro | *d494449782bpf: Fix memory leaks in __check_func_call | *25521fd2e2block: sed-opal: kmalloc the cmd/resp buffers | *2f21d653c6scsi: scsi_transport_sas: Fix error handling in sas_phy_add() | *7cd28bc410pinctrl: rockchip: list all pins in a possible mux route for PX30 | *ab79b8dbe2ASoC: soc-utils: Remove __exit for snd_soc_util_exit() | *eaa8edd865bpf, test_run: Fix alignment problem in bpf_prog_test_run_skb() | *33cabe04d2tty: n_gsm: fix sleep-in-atomic-context bug in gsm_control_send | *ae22294e21serial: imx: Add missing .thaw_noirq hook | *26db1cd519serial: 8250: omap: Flush PM QOS work on remove | *e0db709a58serial: 8250: omap: Fix unpaired pm_runtime_put_sync() in omap8250_remove() | *83b6d4d6daserial: 8250_omap: remove wait loop from Errata i202 workaround | *76db05ab70serial: 8250: omap: Fix missing PM runtime calls for omap8250_set_mctrl() | *2aee616a6bARM: at91: pm: avoid soft resetting AC DLL | *188546c780ASoC: tas2764: Fix set_tdm_slot in case of single slot | *5782896dafASoC: tas2770: Fix set_tdm_slot in case of single slot | *34eee4189bASoC: core: Fix use-after-free in snd_soc_exit() | *aa6f8aecbbARM: dts: at91: sama7g5: fix signal name of pin PB2 | *487fff700fspi: stm32: Print summary 'callbacks suppressed' message | *2cec2f65c1arm64: dts: qcom: sm8350-hdk: Specify which LDO modes are allowed | *44dbe66bb3arm64: dts: qcom: sm8250-xperia-edo: Specify which LDO modes are allowed | *8b2eae7defarm64: dts: qcom: sm8150-xperia-kumano: Specify which LDO modes are allowed | *c8e76eeea7arm64: dts: qcom: sa8155p-adp: Specify which LDO modes are allowed | *30571f28bbhugetlbfs: don't delete error page from pagecache | *14ddbb83c3KVM: x86/pmu: Do not speculatively query Intel GP PMCs that don't exist yet | *a9b964ed7cspi: intel: Use correct mask for flash and protected regions | *f4eb68642emtd: spi-nor: intel-spi: Disable write protection only if asked | *156d0c823cASoC: codecs: jz4725b: Fix spelling mistake "Sourc" -> "Source", "Routee" -> "Route" | *5907ff9f2cx86/cpu: Add several Intel server CPU model numbers | *41e37d04e3Bluetooth: L2CAP: Fix l2cap_global_chan_by_psm | *b02a025dd1btrfs: remove pointless and double ulist frees in error paths of qgroup tests | *1c366c206fdrm/imx: imx-tve: Fix return type of imx_tve_connector_mode_valid | *1c8ded1b38i2c: i801: add lis3lv02d's I2C address for Vostro 5568 | *b432581f19i2c: tegra: Allocate DMA memory for DMA engine | *7b0ae4c7b9firmware: arm_scmi: Cleanup the core driver removal callback | *1a8a2fef27ACPI: x86: Add another system to quirk list for forcing StorageD3Enable | *8a03a4a5cfNFSv4: Retry LOCK on OLD_STATEID during delegation return | *49ca2227c4btrfs: raid56: properly handle the error when unable to find the missing stripe | *0f7bd3a2dfRDMA/efa: Add EFA 0xefa2 PCI ID | *a42d4363e7ACPI: scan: Add LATT2021 to acpi_ignore_dep_ids[] | *004decd41bdrm/amd/display: Remove wrong pipe control lock | *7779efbb99ASoC: rt1308-sdw: add the default value of some registers | *ef1e4ed858selftests/intel_pstate: fix build for ARCH=x86_64 | *dfd3cc1ef3selftests/futex: fix build for clang | *648467236cASoC: Intel: sof_sdw: add quirk variant for LAPBC710 NUC15 | *64ee750c29ASoC: codecs: jz4725b: fix capture selector naming | *150b74cd06ASoC: codecs: jz4725b: use right control for Capture Volume | *5352d8b315ASoC: codecs: jz4725b: fix reported volume for Master ctl | *85134577a7ASoC: codecs: jz4725b: add missed Line In power control bit | *5e61dffb16spi: intel: Fix the offset to get the 64K erase opcode | *c697cb2e66ASoC: wm8962: Add an event handler for TEMP_HP and TEMP_SPK | *569085124dASoC: rt1019: Fix the TDM settings | *4160a515c7ASoC: mt6660: Keep the pm_runtime enables before component stuff in mt6660_i2c_probe | *2963ec4535ASoC: wm8997: Revert "ASoC: wm8997: Fix PM disable depth imbalance in wm8997_probe" | *30a2f9479cASoC: wm5110: Revert "ASoC: wm5110: Fix PM disable depth imbalance in wm5110_probe" | *3bf6da38a2ASoC: wm5102: Revert "ASoC: wm5102: Fix PM disable depth imbalance in wm5102_probe" | *94fa250ea5mm: shmem: don't truncate page if memory failure happens | *003fa19591mm: hwpoison: handle non-anonymous THP correctly | *a62b1bc603mm: hwpoison: refactor refcount check handling * |49ca4a5978Revert "bpf, sockmap: Fix sk->sk_forward_alloc warn_on in sk_stream_kill_queues" * |6fa2a43acdRevert "ALSA: usb-audio: Yet more regression for for the delayed card registration" * |ac2a7a141fMerge 5.15.79 into android13-5.15-lts |/ *3df0eeae4dLinux 5.15.79 *599b24eedfx86/cpu: Restore AMD's DE_CFG MSR after resume *9132fa043fnet: tun: call napi_schedule_prep() to ensure we own a napi *1dea25e25adrm/amdkfd: Migrate in CPU page fault use current mm *a1c303fbd4marvell: octeontx2: build error: unknown type name 'u64' *d948b22834dmaengine: at_hdmac: Check return code of dma_async_device_register *c556ecf32admaengine: at_hdmac: Fix impossible condition *8a941ff34edmaengine: at_hdmac: Don't allow CPU to reorder channel enable *53831f7a13dmaengine: at_hdmac: Fix completion of unissued descriptor in case of errors *14f5462e4admaengine: at_hdmac: Fix descriptor handling when issuing it to hardware *5482403228dmaengine: at_hdmac: Fix concurrency over the active list *82ca19414fdmaengine: at_hdmac: Free the memset buf without holding the chan lock *8fd36e069ddmaengine: at_hdmac: Fix concurrency over descriptor *1ee012d452dmaengine: at_hdmac: Fix concurrency problems by removing atc_complete_all() *90c1b07406dmaengine: at_hdmac: Protect atchan->status with the channel lock *b5ee1fe06admaengine: at_hdmac: Do not call the complete callback on device_terminate_all *9bbf5df0fcdmaengine: at_hdmac: Fix premature completion of desc in issue_pending *f7d1aaa903dmaengine: at_hdmac: Start transfer for cyclic channels in issue_pending *e9777b4efcdmaengine: at_hdmac: Don't start transactions at tx_submit level *4e28674a0edmaengine: at_hdmac: Fix at_lli struct definition *49eba53137cert host tools: Stop complaining about deprecated OpenSSL functions *69e86c6268can: j1939: j1939_send_one(): fix missing CAN header initialization *81fc8f90b8mm/shmem: use page_mapping() to detect page cache for uffd continue *e91451af11mm/memremap.c: map FS_DAX device memory as decrypted *48998c1773mm/damon/dbgfs: check if rm_contexts input is for a real context *c736ed8541udf: Fix a slab-out-of-bounds write bug in udf_find_entry() *2e87eddf57mms: sdhci-esdhc-imx: Fix SDHCI_RESET_ALL for CQHCI *91c38504e5btrfs: zoned: initialize device's zone info for seeding *432c30ba3fbtrfs: selftests: fix wrong error check in btrfs_free_dummy_root() *c9fe4719c6btrfs: fix match incorrectly in dev_args_match_device *f96fd36936wifi: ath11k: avoid deadlock during regulatory update in ath11k_regd_update() *8e2b576cafplatform/x86: hp_wmi: Fix rfkill causing soft blocked wifi *cb3ab0e1e0drm/amdgpu: disable BACO on special BEIGE_GOBY card *dc066a7850drm/i915/dmabuf: fix sg_table handling in map_dma_buf *afbd118838nilfs2: fix use-after-free bug of ns_writer on remount *abc082aac0nilfs2: fix deadlock in nilfs_count_free_blocks() *589da22881ata: libata-scsi: fix SYNCHRONIZE CACHE (16) command failure *51ae4579a5vmlinux.lds.h: Fix placement of '.data..decrypted' section *1f8e08ab32ALSA: usb-audio: Add DSD support for Accuphase DAC-60 *c2451f62b2ALSA: usb-audio: Add quirk entry for M-Audio Micro *031d1480a0ALSA: usb-audio: Yet more regression for for the delayed card registration *574f51e4aaALSA: hda/realtek: Add Positivo C6300 model quirk *7140d7aaf9ALSA: hda: fix potential memleak in 'add_widget_node' *f6d7a487aaALSA: hda/ca0132: add quirk for EVGA Z390 DARK *1ccd55b390ALSA: hda/hdmi - enable runtime pm for more AMD display audio *29100c6742mmc: sdhci-esdhc-imx: use the correct host caps for MMC_CAP_8_BIT_DATA *3dce99e2ebmmc: sdhci-tegra: Fix SDHCI_RESET_ALL for CQHCI *9d6bd33e6ammc: sdhci_am654: Fix SDHCI_RESET_ALL for CQHCI *ad01f16ca9mmc: sdhci-of-arasan: Fix SDHCI_RESET_ALL for CQHCI *1aa78c1d01mmc: cqhci: Provide helper for resetting both SDHCI and CQHCI *c198524a99MIPS: jump_label: Fix compat branch range check *9713ceffa4arm64: efi: Fix handling of misaligned runtime regions and drop warning *518e49f059riscv: fix reserved memory setup *d07c3d7491riscv: vdso: fix build with llvm *cc36c7fa5driscv: process: fix kernel info leakage *a8d67367abnet: macvlan: fix memory leaks of macvlan_common_newlink *7b194dd32bethernet: tundra: free irq when alloc ring failed in tsi108_open() *7de10342fenet: mv643xx_eth: disable napi when init rxq or txq failed in mv643xx_eth_open() *88e1dd2d92ethernet: s2io: disable napi when start nic failed in s2io_card_up() *3652f1f8d3net: atlantic: macsec: clear encryption keys from the stack *fca3b0a1fdnet: phy: mscc: macsec: clear encryption keys when freeing a flow *60a0af8813stmmac: dwmac-loongson: fix missing of_node_put() while module exiting *ee4a9bd2c7stmmac: dwmac-loongson: fix missing pci_disable_device() in loongson_dwmac_probe() *4a8770eebcstmmac: dwmac-loongson: fix missing pci_disable_msi() while module exiting *83196d8dc5cxgb4vf: shut down the adapter when t4vf_update_port_info() failed in cxgb4vf_open() *49d8a6e24amctp: Fix an error handling path in mctp_init() *29961d2332stmmac: intel: Update PCH PTP clock rate from 200MHz to 204.8MHz *8604bebc5cstmmac: intel: Enable 2.5Gbps for Intel AlderLake-S *7dec6dae2bnet: cxgb3_main: disable napi when bind qsets failed in cxgb_up() *960f9d30denet: cpsw: disable napi in cpsw_ndo_open() *1360778fdbnet/mlx5e: E-Switch, Fix comparing termination table instance *f13e9ebd29net/mlx5: Allow async trigger completion execution on single CPU systems *48b73b46a5net/mlx5: Bridge, verify LAG state when adding bond to bridge *13b1ea861enet: wwan: iosm: fix memory leak in ipc_pcie_read_bios_cfg *7e4dcacb4dnet: nixge: disable napi when enable interrupts failed in nixge_open() *409731df63net: marvell: prestera: fix memory leak in prestera_rxtx_switch_init() *77ff31cba9netfilter: Cleanup nft_net->module_list from nf_tables_exit_net() *e62cb1c093netfilter: nfnetlink: fix potential dead lock in nfnetlink_rcv_msg() *0bd20318daperf tools: Add the include/perf/ directory to .gitignore *a733671e38perf stat: Fix printing os->prefix in CSV metrics output *c36e9e2c4adrivers: net: xgene: disable napi when register irq failed in xgene_enet_open() *4689bd3a1bnet: lapbether: fix issue of invalid opcode in lapbeth_open() *1dd27541aadmaengine: ti: k3-udma-glue: fix memory leak when register device fail *992e966cafdmaengine: mv_xor_v2: Fix a resource leak in mv_xor_v2_remove() *9766af75badmaengine: pxa_dma: use platform_get_irq_optional *301caa0609tipc: fix the msg->req tlv len check in tipc_nl_compat_name_table_dump_header *6a264203dbnet: broadcom: Fix BCMGENET Kconfig *e7871b9a21net: stmmac: dwmac-meson8b: fix meson8b_devm_clk_prepare_enable() *261178a1c2can: af_can: fix NULL pointer dereference in can_rx_register() *2acb2779b1ipv6: addrlabel: fix infoleak when sending struct ifaddrlblmsg to network *13ecaa6832tcp: prohibit TCP_REPAIR_OPTIONS if data was already sent *bc79cb9fb0drm/vc4: Fix missing platform_unregister_drivers() call in vc4_drm_register() *2845bc9070net: wwan: mhi: fix memory leak in mhi_mbim_dellink *2ce2348c28net: wwan: iosm: fix memory leak in ipc_wwan_dellink *7b6bc50f65hamradio: fix issue of dev reference count leakage in bpq_device_event() *f59adebb8cnet: lapbether: fix issue of dev reference count leakage in lapbeth_device_event() *119407dc32KVM: s390: pv: don't allow userspace to set the clock under PV *500bcd3a99phy: ralink: mt7621-pci: add sentinel to quirks table *151dc8087bcapabilities: fix undefined behavior in bit shift for CAP_TO_MASK *435c7ddfd5net: fman: Unregister ethernet device on removal *3a504d6d96bnxt_en: fix potentially incorrect return value for ndo_rx_flow_steer *ac257c43fabnxt_en: Fix possible crash in bnxt_hwrm_set_coal() *d7569302a7net: tun: Fix memory leaks of napi_get_frags *430d1f4964octeontx2-pf: NIX TX overwrites SQ_CTX_HW_S[SQ_INT] *ec0db81883octeontx2-pf: Use hardware register for CQE count *b89a0d8859macsec: clear encryption keys from the stack after setting up offload *eeba7f07a0macsec: fix detection of RXSCs when toggling offloading *3070a880ebmacsec: fix secy->n_rx_sc accounting *e957555a36macsec: delete new rxsc when offload fails *ad25a115f5net: gso: fix panic on frag_list with mixed head alloc types *466ce46f25bpf: Fix wrong reg type conversion in release_reference() *35d8130f2abpf: Add helper macro bpf_for_each_reg_in_vstate *61274498fbbpf, sock_map: Move cancel_work_sync() out of sock lock *32b5dd03bebpf: Fix sockmap calling sleepable function in teardown path *e991558189bpf, sockmap: Fix sk->sk_forward_alloc warn_on in sk_stream_kill_queues *5ad95d7134HID: hyperv: fix possible memory leak in mousevsc_probe() *6dcdd1b68bbpftool: Fix NULL pointer dereference when pin {PROG, MAP, LINK} without FILE *2fc902245cwifi: mac80211: Set TWT Information Frame Disabled bit as 1 *95adbd2ac8bpf, sockmap: Fix the sk->sk_forward_alloc warning of sk_stream_kill_queues *06615967d4bpf, verifier: Fix memory leak in array reallocation for stack state *4335a82c4fsoundwire: qcom: check for outanding writes before doing a read *ae4dad2e53soundwire: qcom: reinit broadcast completion *38c9fa2cc6wifi: cfg80211: fix memory leak in query_regdb_file() *2c6ba0a787wifi: cfg80211: silence a sparse RCU warning *921738c280phy: stm32: fix an error code in probe *fa722006f7hwspinlock: qcom: correct MMIO max register for newer SoCs *3c1bb6187edrm/amdkfd: Fix NULL pointer dereference in svm_migrate_to_ram() *b1f8522771drm/amdkfd: handle CPU fault on COW mapping *36770c045adrm/amdkfd: avoid recursive lock in migrations back to RAM *93a5de7e88fuse: fix readdir cache race *1920cf9454thunderbolt: Add DP OUT resource when DP tunnel is discovered *47dbf24969thunderbolt: Tear down existing tunnels when resuming from hibernate And update the .xml file with the new symbol that we are tracking and the abi preservation fix: 1 function symbol(s) added 'void __dev_kfree_skb_irq(struct sk_buff *, enum skb_free_reason)' type 'struct sdhci_host' changed member 'union { struct { u8 reinit_uhs; u8 reserve01; u8 drv_type; u16 reserve02; u32 reserve03; }; struct { u64 android_kabi_reserved1; }; union { }; }' was added member 'u64 android_kabi_reserved1' was removed Change-Id: If4a059230a137dee54298fff61ec87306bf96b0f Signed-off-by: Greg Kroah-Hartman <gregkh@google.com>
6856 lines
210 KiB
C
6856 lines
210 KiB
C
// SPDX-License-Identifier: GPL-2.0
|
|
|
|
#include <linux/objtool.h>
|
|
#include <linux/percpu.h>
|
|
|
|
#include <asm/debugreg.h>
|
|
#include <asm/mmu_context.h>
|
|
|
|
#include "cpuid.h"
|
|
#include "hyperv.h"
|
|
#include "mmu.h"
|
|
#include "nested.h"
|
|
#include "pmu.h"
|
|
#include "sgx.h"
|
|
#include "trace.h"
|
|
#include "vmx.h"
|
|
#include "x86.h"
|
|
|
|
static bool __read_mostly enable_shadow_vmcs = 1;
|
|
module_param_named(enable_shadow_vmcs, enable_shadow_vmcs, bool, S_IRUGO);
|
|
|
|
static bool __read_mostly nested_early_check = 0;
|
|
module_param(nested_early_check, bool, S_IRUGO);
|
|
|
|
#define CC KVM_NESTED_VMENTER_CONSISTENCY_CHECK
|
|
|
|
/*
|
|
* Hyper-V requires all of these, so mark them as supported even though
|
|
* they are just treated the same as all-context.
|
|
*/
|
|
#define VMX_VPID_EXTENT_SUPPORTED_MASK \
|
|
(VMX_VPID_EXTENT_INDIVIDUAL_ADDR_BIT | \
|
|
VMX_VPID_EXTENT_SINGLE_CONTEXT_BIT | \
|
|
VMX_VPID_EXTENT_GLOBAL_CONTEXT_BIT | \
|
|
VMX_VPID_EXTENT_SINGLE_NON_GLOBAL_BIT)
|
|
|
|
#define VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE 5
|
|
|
|
enum {
|
|
VMX_VMREAD_BITMAP,
|
|
VMX_VMWRITE_BITMAP,
|
|
VMX_BITMAP_NR
|
|
};
|
|
static unsigned long *vmx_bitmap[VMX_BITMAP_NR];
|
|
|
|
#define vmx_vmread_bitmap (vmx_bitmap[VMX_VMREAD_BITMAP])
|
|
#define vmx_vmwrite_bitmap (vmx_bitmap[VMX_VMWRITE_BITMAP])
|
|
|
|
struct shadow_vmcs_field {
|
|
u16 encoding;
|
|
u16 offset;
|
|
};
|
|
static struct shadow_vmcs_field shadow_read_only_fields[] = {
|
|
#define SHADOW_FIELD_RO(x, y) { x, offsetof(struct vmcs12, y) },
|
|
#include "vmcs_shadow_fields.h"
|
|
};
|
|
static int max_shadow_read_only_fields =
|
|
ARRAY_SIZE(shadow_read_only_fields);
|
|
|
|
static struct shadow_vmcs_field shadow_read_write_fields[] = {
|
|
#define SHADOW_FIELD_RW(x, y) { x, offsetof(struct vmcs12, y) },
|
|
#include "vmcs_shadow_fields.h"
|
|
};
|
|
static int max_shadow_read_write_fields =
|
|
ARRAY_SIZE(shadow_read_write_fields);
|
|
|
|
static void init_vmcs_shadow_fields(void)
|
|
{
|
|
int i, j;
|
|
|
|
memset(vmx_vmread_bitmap, 0xff, PAGE_SIZE);
|
|
memset(vmx_vmwrite_bitmap, 0xff, PAGE_SIZE);
|
|
|
|
for (i = j = 0; i < max_shadow_read_only_fields; i++) {
|
|
struct shadow_vmcs_field entry = shadow_read_only_fields[i];
|
|
u16 field = entry.encoding;
|
|
|
|
if (vmcs_field_width(field) == VMCS_FIELD_WIDTH_U64 &&
|
|
(i + 1 == max_shadow_read_only_fields ||
|
|
shadow_read_only_fields[i + 1].encoding != field + 1))
|
|
pr_err("Missing field from shadow_read_only_field %x\n",
|
|
field + 1);
|
|
|
|
clear_bit(field, vmx_vmread_bitmap);
|
|
if (field & 1)
|
|
#ifdef CONFIG_X86_64
|
|
continue;
|
|
#else
|
|
entry.offset += sizeof(u32);
|
|
#endif
|
|
shadow_read_only_fields[j++] = entry;
|
|
}
|
|
max_shadow_read_only_fields = j;
|
|
|
|
for (i = j = 0; i < max_shadow_read_write_fields; i++) {
|
|
struct shadow_vmcs_field entry = shadow_read_write_fields[i];
|
|
u16 field = entry.encoding;
|
|
|
|
if (vmcs_field_width(field) == VMCS_FIELD_WIDTH_U64 &&
|
|
(i + 1 == max_shadow_read_write_fields ||
|
|
shadow_read_write_fields[i + 1].encoding != field + 1))
|
|
pr_err("Missing field from shadow_read_write_field %x\n",
|
|
field + 1);
|
|
|
|
WARN_ONCE(field >= GUEST_ES_AR_BYTES &&
|
|
field <= GUEST_TR_AR_BYTES,
|
|
"Update vmcs12_write_any() to drop reserved bits from AR_BYTES");
|
|
|
|
/*
|
|
* PML and the preemption timer can be emulated, but the
|
|
* processor cannot vmwrite to fields that don't exist
|
|
* on bare metal.
|
|
*/
|
|
switch (field) {
|
|
case GUEST_PML_INDEX:
|
|
if (!cpu_has_vmx_pml())
|
|
continue;
|
|
break;
|
|
case VMX_PREEMPTION_TIMER_VALUE:
|
|
if (!cpu_has_vmx_preemption_timer())
|
|
continue;
|
|
break;
|
|
case GUEST_INTR_STATUS:
|
|
if (!cpu_has_vmx_apicv())
|
|
continue;
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
|
|
clear_bit(field, vmx_vmwrite_bitmap);
|
|
clear_bit(field, vmx_vmread_bitmap);
|
|
if (field & 1)
|
|
#ifdef CONFIG_X86_64
|
|
continue;
|
|
#else
|
|
entry.offset += sizeof(u32);
|
|
#endif
|
|
shadow_read_write_fields[j++] = entry;
|
|
}
|
|
max_shadow_read_write_fields = j;
|
|
}
|
|
|
|
/*
|
|
* The following 3 functions, nested_vmx_succeed()/failValid()/failInvalid(),
|
|
* set the success or error code of an emulated VMX instruction (as specified
|
|
* by Vol 2B, VMX Instruction Reference, "Conventions"), and skip the emulated
|
|
* instruction.
|
|
*/
|
|
static int nested_vmx_succeed(struct kvm_vcpu *vcpu)
|
|
{
|
|
vmx_set_rflags(vcpu, vmx_get_rflags(vcpu)
|
|
& ~(X86_EFLAGS_CF | X86_EFLAGS_PF | X86_EFLAGS_AF |
|
|
X86_EFLAGS_ZF | X86_EFLAGS_SF | X86_EFLAGS_OF));
|
|
return kvm_skip_emulated_instruction(vcpu);
|
|
}
|
|
|
|
static int nested_vmx_failInvalid(struct kvm_vcpu *vcpu)
|
|
{
|
|
vmx_set_rflags(vcpu, (vmx_get_rflags(vcpu)
|
|
& ~(X86_EFLAGS_PF | X86_EFLAGS_AF | X86_EFLAGS_ZF |
|
|
X86_EFLAGS_SF | X86_EFLAGS_OF))
|
|
| X86_EFLAGS_CF);
|
|
return kvm_skip_emulated_instruction(vcpu);
|
|
}
|
|
|
|
static int nested_vmx_failValid(struct kvm_vcpu *vcpu,
|
|
u32 vm_instruction_error)
|
|
{
|
|
vmx_set_rflags(vcpu, (vmx_get_rflags(vcpu)
|
|
& ~(X86_EFLAGS_CF | X86_EFLAGS_PF | X86_EFLAGS_AF |
|
|
X86_EFLAGS_SF | X86_EFLAGS_OF))
|
|
| X86_EFLAGS_ZF);
|
|
get_vmcs12(vcpu)->vm_instruction_error = vm_instruction_error;
|
|
/*
|
|
* We don't need to force sync to shadow VMCS because
|
|
* VM_INSTRUCTION_ERROR is not shadowed. Enlightened VMCS 'shadows' all
|
|
* fields and thus must be synced.
|
|
*/
|
|
if (to_vmx(vcpu)->nested.hv_evmcs_vmptr != EVMPTR_INVALID)
|
|
to_vmx(vcpu)->nested.need_vmcs12_to_shadow_sync = true;
|
|
|
|
return kvm_skip_emulated_instruction(vcpu);
|
|
}
|
|
|
|
static int nested_vmx_fail(struct kvm_vcpu *vcpu, u32 vm_instruction_error)
|
|
{
|
|
struct vcpu_vmx *vmx = to_vmx(vcpu);
|
|
|
|
/*
|
|
* failValid writes the error number to the current VMCS, which
|
|
* can't be done if there isn't a current VMCS.
|
|
*/
|
|
if (vmx->nested.current_vmptr == -1ull &&
|
|
!evmptr_is_valid(vmx->nested.hv_evmcs_vmptr))
|
|
return nested_vmx_failInvalid(vcpu);
|
|
|
|
return nested_vmx_failValid(vcpu, vm_instruction_error);
|
|
}
|
|
|
|
static void nested_vmx_abort(struct kvm_vcpu *vcpu, u32 indicator)
|
|
{
|
|
/* TODO: not to reset guest simply here. */
|
|
kvm_make_request(KVM_REQ_TRIPLE_FAULT, vcpu);
|
|
pr_debug_ratelimited("kvm: nested vmx abort, indicator %d\n", indicator);
|
|
}
|
|
|
|
static inline bool vmx_control_verify(u32 control, u32 low, u32 high)
|
|
{
|
|
return fixed_bits_valid(control, low, high);
|
|
}
|
|
|
|
static inline u64 vmx_control_msr(u32 low, u32 high)
|
|
{
|
|
return low | ((u64)high << 32);
|
|
}
|
|
|
|
static void vmx_disable_shadow_vmcs(struct vcpu_vmx *vmx)
|
|
{
|
|
secondary_exec_controls_clearbit(vmx, SECONDARY_EXEC_SHADOW_VMCS);
|
|
vmcs_write64(VMCS_LINK_POINTER, -1ull);
|
|
vmx->nested.need_vmcs12_to_shadow_sync = false;
|
|
}
|
|
|
|
static inline void nested_release_evmcs(struct kvm_vcpu *vcpu)
|
|
{
|
|
struct vcpu_vmx *vmx = to_vmx(vcpu);
|
|
|
|
if (evmptr_is_valid(vmx->nested.hv_evmcs_vmptr)) {
|
|
kvm_vcpu_unmap(vcpu, &vmx->nested.hv_evmcs_map, true);
|
|
vmx->nested.hv_evmcs = NULL;
|
|
}
|
|
|
|
vmx->nested.hv_evmcs_vmptr = EVMPTR_INVALID;
|
|
}
|
|
|
|
static void vmx_sync_vmcs_host_state(struct vcpu_vmx *vmx,
|
|
struct loaded_vmcs *prev)
|
|
{
|
|
struct vmcs_host_state *dest, *src;
|
|
|
|
if (unlikely(!vmx->guest_state_loaded))
|
|
return;
|
|
|
|
src = &prev->host_state;
|
|
dest = &vmx->loaded_vmcs->host_state;
|
|
|
|
vmx_set_host_fs_gs(dest, src->fs_sel, src->gs_sel, src->fs_base, src->gs_base);
|
|
dest->ldt_sel = src->ldt_sel;
|
|
#ifdef CONFIG_X86_64
|
|
dest->ds_sel = src->ds_sel;
|
|
dest->es_sel = src->es_sel;
|
|
#endif
|
|
}
|
|
|
|
static void vmx_switch_vmcs(struct kvm_vcpu *vcpu, struct loaded_vmcs *vmcs)
|
|
{
|
|
struct vcpu_vmx *vmx = to_vmx(vcpu);
|
|
struct loaded_vmcs *prev;
|
|
int cpu;
|
|
|
|
if (WARN_ON_ONCE(vmx->loaded_vmcs == vmcs))
|
|
return;
|
|
|
|
cpu = get_cpu();
|
|
prev = vmx->loaded_vmcs;
|
|
vmx->loaded_vmcs = vmcs;
|
|
vmx_vcpu_load_vmcs(vcpu, cpu, prev);
|
|
vmx_sync_vmcs_host_state(vmx, prev);
|
|
put_cpu();
|
|
|
|
vmx_register_cache_reset(vcpu);
|
|
}
|
|
|
|
/*
|
|
* Free whatever needs to be freed from vmx->nested when L1 goes down, or
|
|
* just stops using VMX.
|
|
*/
|
|
static void free_nested(struct kvm_vcpu *vcpu)
|
|
{
|
|
struct vcpu_vmx *vmx = to_vmx(vcpu);
|
|
|
|
if (WARN_ON_ONCE(vmx->loaded_vmcs != &vmx->vmcs01))
|
|
vmx_switch_vmcs(vcpu, &vmx->vmcs01);
|
|
|
|
if (!vmx->nested.vmxon && !vmx->nested.smm.vmxon)
|
|
return;
|
|
|
|
kvm_clear_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu);
|
|
|
|
vmx->nested.vmxon = false;
|
|
vmx->nested.smm.vmxon = false;
|
|
free_vpid(vmx->nested.vpid02);
|
|
vmx->nested.posted_intr_nv = -1;
|
|
vmx->nested.current_vmptr = -1ull;
|
|
if (enable_shadow_vmcs) {
|
|
vmx_disable_shadow_vmcs(vmx);
|
|
vmcs_clear(vmx->vmcs01.shadow_vmcs);
|
|
free_vmcs(vmx->vmcs01.shadow_vmcs);
|
|
vmx->vmcs01.shadow_vmcs = NULL;
|
|
}
|
|
kfree(vmx->nested.cached_vmcs12);
|
|
vmx->nested.cached_vmcs12 = NULL;
|
|
kfree(vmx->nested.cached_shadow_vmcs12);
|
|
vmx->nested.cached_shadow_vmcs12 = NULL;
|
|
/* Unpin physical memory we referred to in the vmcs02 */
|
|
if (vmx->nested.apic_access_page) {
|
|
kvm_release_page_clean(vmx->nested.apic_access_page);
|
|
vmx->nested.apic_access_page = NULL;
|
|
}
|
|
kvm_vcpu_unmap(vcpu, &vmx->nested.virtual_apic_map, true);
|
|
kvm_vcpu_unmap(vcpu, &vmx->nested.pi_desc_map, true);
|
|
vmx->nested.pi_desc = NULL;
|
|
|
|
kvm_mmu_free_roots(vcpu, &vcpu->arch.guest_mmu, KVM_MMU_ROOTS_ALL);
|
|
|
|
nested_release_evmcs(vcpu);
|
|
|
|
free_loaded_vmcs(&vmx->nested.vmcs02);
|
|
}
|
|
|
|
/*
|
|
* Ensure that the current vmcs of the logical processor is the
|
|
* vmcs01 of the vcpu before calling free_nested().
|
|
*/
|
|
void nested_vmx_free_vcpu(struct kvm_vcpu *vcpu)
|
|
{
|
|
vcpu_load(vcpu);
|
|
vmx_leave_nested(vcpu);
|
|
vcpu_put(vcpu);
|
|
}
|
|
|
|
#define EPTP_PA_MASK GENMASK_ULL(51, 12)
|
|
|
|
static bool nested_ept_root_matches(hpa_t root_hpa, u64 root_eptp, u64 eptp)
|
|
{
|
|
return VALID_PAGE(root_hpa) &&
|
|
((root_eptp & EPTP_PA_MASK) == (eptp & EPTP_PA_MASK));
|
|
}
|
|
|
|
static void nested_ept_invalidate_addr(struct kvm_vcpu *vcpu, gpa_t eptp,
|
|
gpa_t addr)
|
|
{
|
|
uint i;
|
|
struct kvm_mmu_root_info *cached_root;
|
|
|
|
WARN_ON_ONCE(!mmu_is_nested(vcpu));
|
|
|
|
for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++) {
|
|
cached_root = &vcpu->arch.mmu->prev_roots[i];
|
|
|
|
if (nested_ept_root_matches(cached_root->hpa, cached_root->pgd,
|
|
eptp))
|
|
vcpu->arch.mmu->invlpg(vcpu, addr, cached_root->hpa);
|
|
}
|
|
}
|
|
|
|
static void nested_ept_inject_page_fault(struct kvm_vcpu *vcpu,
|
|
struct x86_exception *fault)
|
|
{
|
|
struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
|
|
struct vcpu_vmx *vmx = to_vmx(vcpu);
|
|
u32 vm_exit_reason;
|
|
unsigned long exit_qualification = vcpu->arch.exit_qualification;
|
|
|
|
if (vmx->nested.pml_full) {
|
|
vm_exit_reason = EXIT_REASON_PML_FULL;
|
|
vmx->nested.pml_full = false;
|
|
exit_qualification &= INTR_INFO_UNBLOCK_NMI;
|
|
} else {
|
|
if (fault->error_code & PFERR_RSVD_MASK)
|
|
vm_exit_reason = EXIT_REASON_EPT_MISCONFIG;
|
|
else
|
|
vm_exit_reason = EXIT_REASON_EPT_VIOLATION;
|
|
|
|
/*
|
|
* Although the caller (kvm_inject_emulated_page_fault) would
|
|
* have already synced the faulting address in the shadow EPT
|
|
* tables for the current EPTP12, we also need to sync it for
|
|
* any other cached EPTP02s based on the same EP4TA, since the
|
|
* TLB associates mappings to the EP4TA rather than the full EPTP.
|
|
*/
|
|
nested_ept_invalidate_addr(vcpu, vmcs12->ept_pointer,
|
|
fault->address);
|
|
}
|
|
|
|
nested_vmx_vmexit(vcpu, vm_exit_reason, 0, exit_qualification);
|
|
vmcs12->guest_physical_address = fault->address;
|
|
}
|
|
|
|
static void nested_ept_new_eptp(struct kvm_vcpu *vcpu)
|
|
{
|
|
kvm_init_shadow_ept_mmu(vcpu,
|
|
to_vmx(vcpu)->nested.msrs.ept_caps &
|
|
VMX_EPT_EXECUTE_ONLY_BIT,
|
|
nested_ept_ad_enabled(vcpu),
|
|
nested_ept_get_eptp(vcpu));
|
|
}
|
|
|
|
static void nested_ept_init_mmu_context(struct kvm_vcpu *vcpu)
|
|
{
|
|
WARN_ON(mmu_is_nested(vcpu));
|
|
|
|
vcpu->arch.mmu = &vcpu->arch.guest_mmu;
|
|
nested_ept_new_eptp(vcpu);
|
|
vcpu->arch.mmu->get_guest_pgd = nested_ept_get_eptp;
|
|
vcpu->arch.mmu->inject_page_fault = nested_ept_inject_page_fault;
|
|
vcpu->arch.mmu->get_pdptr = kvm_pdptr_read;
|
|
|
|
vcpu->arch.walk_mmu = &vcpu->arch.nested_mmu;
|
|
}
|
|
|
|
static void nested_ept_uninit_mmu_context(struct kvm_vcpu *vcpu)
|
|
{
|
|
vcpu->arch.mmu = &vcpu->arch.root_mmu;
|
|
vcpu->arch.walk_mmu = &vcpu->arch.root_mmu;
|
|
}
|
|
|
|
static bool nested_vmx_is_page_fault_vmexit(struct vmcs12 *vmcs12,
|
|
u16 error_code)
|
|
{
|
|
bool inequality, bit;
|
|
|
|
bit = (vmcs12->exception_bitmap & (1u << PF_VECTOR)) != 0;
|
|
inequality =
|
|
(error_code & vmcs12->page_fault_error_code_mask) !=
|
|
vmcs12->page_fault_error_code_match;
|
|
return inequality ^ bit;
|
|
}
|
|
|
|
|
|
/*
|
|
* KVM wants to inject page-faults which it got to the guest. This function
|
|
* checks whether in a nested guest, we need to inject them to L1 or L2.
|
|
*/
|
|
static int nested_vmx_check_exception(struct kvm_vcpu *vcpu, unsigned long *exit_qual)
|
|
{
|
|
struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
|
|
unsigned int nr = vcpu->arch.exception.nr;
|
|
bool has_payload = vcpu->arch.exception.has_payload;
|
|
unsigned long payload = vcpu->arch.exception.payload;
|
|
|
|
if (nr == PF_VECTOR) {
|
|
if (vcpu->arch.exception.nested_apf) {
|
|
*exit_qual = vcpu->arch.apf.nested_apf_token;
|
|
return 1;
|
|
}
|
|
if (nested_vmx_is_page_fault_vmexit(vmcs12,
|
|
vcpu->arch.exception.error_code)) {
|
|
*exit_qual = has_payload ? payload : vcpu->arch.cr2;
|
|
return 1;
|
|
}
|
|
} else if (vmcs12->exception_bitmap & (1u << nr)) {
|
|
if (nr == DB_VECTOR) {
|
|
if (!has_payload) {
|
|
payload = vcpu->arch.dr6;
|
|
payload &= ~DR6_BT;
|
|
payload ^= DR6_ACTIVE_LOW;
|
|
}
|
|
*exit_qual = payload;
|
|
} else
|
|
*exit_qual = 0;
|
|
return 1;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
|
|
static void vmx_inject_page_fault_nested(struct kvm_vcpu *vcpu,
|
|
struct x86_exception *fault)
|
|
{
|
|
struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
|
|
|
|
WARN_ON(!is_guest_mode(vcpu));
|
|
|
|
if (nested_vmx_is_page_fault_vmexit(vmcs12, fault->error_code) &&
|
|
!to_vmx(vcpu)->nested.nested_run_pending) {
|
|
vmcs12->vm_exit_intr_error_code = fault->error_code;
|
|
nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI,
|
|
PF_VECTOR | INTR_TYPE_HARD_EXCEPTION |
|
|
INTR_INFO_DELIVER_CODE_MASK | INTR_INFO_VALID_MASK,
|
|
fault->address);
|
|
} else {
|
|
kvm_inject_page_fault(vcpu, fault);
|
|
}
|
|
}
|
|
|
|
static int nested_vmx_check_io_bitmap_controls(struct kvm_vcpu *vcpu,
|
|
struct vmcs12 *vmcs12)
|
|
{
|
|
if (!nested_cpu_has(vmcs12, CPU_BASED_USE_IO_BITMAPS))
|
|
return 0;
|
|
|
|
if (CC(!page_address_valid(vcpu, vmcs12->io_bitmap_a)) ||
|
|
CC(!page_address_valid(vcpu, vmcs12->io_bitmap_b)))
|
|
return -EINVAL;
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int nested_vmx_check_msr_bitmap_controls(struct kvm_vcpu *vcpu,
|
|
struct vmcs12 *vmcs12)
|
|
{
|
|
if (!nested_cpu_has(vmcs12, CPU_BASED_USE_MSR_BITMAPS))
|
|
return 0;
|
|
|
|
if (CC(!page_address_valid(vcpu, vmcs12->msr_bitmap)))
|
|
return -EINVAL;
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int nested_vmx_check_tpr_shadow_controls(struct kvm_vcpu *vcpu,
|
|
struct vmcs12 *vmcs12)
|
|
{
|
|
if (!nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW))
|
|
return 0;
|
|
|
|
if (CC(!page_address_valid(vcpu, vmcs12->virtual_apic_page_addr)))
|
|
return -EINVAL;
|
|
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* If a msr is allowed by L0, we should check whether it is allowed by L1.
|
|
* The corresponding bit will be cleared unless both of L0 and L1 allow it.
|
|
*/
|
|
static void nested_vmx_disable_intercept_for_msr(unsigned long *msr_bitmap_l1,
|
|
unsigned long *msr_bitmap_nested,
|
|
u32 msr, int type)
|
|
{
|
|
int f = sizeof(unsigned long);
|
|
|
|
/*
|
|
* See Intel PRM Vol. 3, 20.6.9 (MSR-Bitmap Address). Early manuals
|
|
* have the write-low and read-high bitmap offsets the wrong way round.
|
|
* We can control MSRs 0x00000000-0x00001fff and 0xc0000000-0xc0001fff.
|
|
*/
|
|
if (msr <= 0x1fff) {
|
|
if (type & MSR_TYPE_R &&
|
|
!test_bit(msr, msr_bitmap_l1 + 0x000 / f))
|
|
/* read-low */
|
|
__clear_bit(msr, msr_bitmap_nested + 0x000 / f);
|
|
|
|
if (type & MSR_TYPE_W &&
|
|
!test_bit(msr, msr_bitmap_l1 + 0x800 / f))
|
|
/* write-low */
|
|
__clear_bit(msr, msr_bitmap_nested + 0x800 / f);
|
|
|
|
} else if ((msr >= 0xc0000000) && (msr <= 0xc0001fff)) {
|
|
msr &= 0x1fff;
|
|
if (type & MSR_TYPE_R &&
|
|
!test_bit(msr, msr_bitmap_l1 + 0x400 / f))
|
|
/* read-high */
|
|
__clear_bit(msr, msr_bitmap_nested + 0x400 / f);
|
|
|
|
if (type & MSR_TYPE_W &&
|
|
!test_bit(msr, msr_bitmap_l1 + 0xc00 / f))
|
|
/* write-high */
|
|
__clear_bit(msr, msr_bitmap_nested + 0xc00 / f);
|
|
|
|
}
|
|
}
|
|
|
|
static inline void enable_x2apic_msr_intercepts(unsigned long *msr_bitmap)
|
|
{
|
|
int msr;
|
|
|
|
for (msr = 0x800; msr <= 0x8ff; msr += BITS_PER_LONG) {
|
|
unsigned word = msr / BITS_PER_LONG;
|
|
|
|
msr_bitmap[word] = ~0;
|
|
msr_bitmap[word + (0x800 / sizeof(long))] = ~0;
|
|
}
|
|
}
|
|
|
|
#define BUILD_NVMX_MSR_INTERCEPT_HELPER(rw) \
|
|
static inline \
|
|
void nested_vmx_set_msr_##rw##_intercept(struct vcpu_vmx *vmx, \
|
|
unsigned long *msr_bitmap_l1, \
|
|
unsigned long *msr_bitmap_l0, u32 msr) \
|
|
{ \
|
|
if (vmx_test_msr_bitmap_##rw(vmx->vmcs01.msr_bitmap, msr) || \
|
|
vmx_test_msr_bitmap_##rw(msr_bitmap_l1, msr)) \
|
|
vmx_set_msr_bitmap_##rw(msr_bitmap_l0, msr); \
|
|
else \
|
|
vmx_clear_msr_bitmap_##rw(msr_bitmap_l0, msr); \
|
|
}
|
|
BUILD_NVMX_MSR_INTERCEPT_HELPER(read)
|
|
BUILD_NVMX_MSR_INTERCEPT_HELPER(write)
|
|
|
|
static inline void nested_vmx_set_intercept_for_msr(struct vcpu_vmx *vmx,
|
|
unsigned long *msr_bitmap_l1,
|
|
unsigned long *msr_bitmap_l0,
|
|
u32 msr, int types)
|
|
{
|
|
if (types & MSR_TYPE_R)
|
|
nested_vmx_set_msr_read_intercept(vmx, msr_bitmap_l1,
|
|
msr_bitmap_l0, msr);
|
|
if (types & MSR_TYPE_W)
|
|
nested_vmx_set_msr_write_intercept(vmx, msr_bitmap_l1,
|
|
msr_bitmap_l0, msr);
|
|
}
|
|
|
|
/*
|
|
* Merge L0's and L1's MSR bitmap, return false to indicate that
|
|
* we do not use the hardware.
|
|
*/
|
|
static inline bool nested_vmx_prepare_msr_bitmap(struct kvm_vcpu *vcpu,
|
|
struct vmcs12 *vmcs12)
|
|
{
|
|
struct vcpu_vmx *vmx = to_vmx(vcpu);
|
|
int msr;
|
|
unsigned long *msr_bitmap_l1;
|
|
unsigned long *msr_bitmap_l0 = vmx->nested.vmcs02.msr_bitmap;
|
|
struct kvm_host_map *map = &vmx->nested.msr_bitmap_map;
|
|
|
|
/* Nothing to do if the MSR bitmap is not in use. */
|
|
if (!cpu_has_vmx_msr_bitmap() ||
|
|
!nested_cpu_has(vmcs12, CPU_BASED_USE_MSR_BITMAPS))
|
|
return false;
|
|
|
|
if (kvm_vcpu_map(vcpu, gpa_to_gfn(vmcs12->msr_bitmap), map))
|
|
return false;
|
|
|
|
msr_bitmap_l1 = (unsigned long *)map->hva;
|
|
|
|
/*
|
|
* To keep the control flow simple, pay eight 8-byte writes (sixteen
|
|
* 4-byte writes on 32-bit systems) up front to enable intercepts for
|
|
* the x2APIC MSR range and selectively disable them below.
|
|
*/
|
|
enable_x2apic_msr_intercepts(msr_bitmap_l0);
|
|
|
|
if (nested_cpu_has_virt_x2apic_mode(vmcs12)) {
|
|
if (nested_cpu_has_apic_reg_virt(vmcs12)) {
|
|
/*
|
|
* L0 need not intercept reads for MSRs between 0x800
|
|
* and 0x8ff, it just lets the processor take the value
|
|
* from the virtual-APIC page; take those 256 bits
|
|
* directly from the L1 bitmap.
|
|
*/
|
|
for (msr = 0x800; msr <= 0x8ff; msr += BITS_PER_LONG) {
|
|
unsigned word = msr / BITS_PER_LONG;
|
|
|
|
msr_bitmap_l0[word] = msr_bitmap_l1[word];
|
|
}
|
|
}
|
|
|
|
nested_vmx_disable_intercept_for_msr(
|
|
msr_bitmap_l1, msr_bitmap_l0,
|
|
X2APIC_MSR(APIC_TASKPRI),
|
|
MSR_TYPE_R | MSR_TYPE_W);
|
|
|
|
if (nested_cpu_has_vid(vmcs12)) {
|
|
nested_vmx_disable_intercept_for_msr(
|
|
msr_bitmap_l1, msr_bitmap_l0,
|
|
X2APIC_MSR(APIC_EOI),
|
|
MSR_TYPE_W);
|
|
nested_vmx_disable_intercept_for_msr(
|
|
msr_bitmap_l1, msr_bitmap_l0,
|
|
X2APIC_MSR(APIC_SELF_IPI),
|
|
MSR_TYPE_W);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Always check vmcs01's bitmap to honor userspace MSR filters and any
|
|
* other runtime changes to vmcs01's bitmap, e.g. dynamic pass-through.
|
|
*/
|
|
#ifdef CONFIG_X86_64
|
|
nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0,
|
|
MSR_FS_BASE, MSR_TYPE_RW);
|
|
|
|
nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0,
|
|
MSR_GS_BASE, MSR_TYPE_RW);
|
|
|
|
nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0,
|
|
MSR_KERNEL_GS_BASE, MSR_TYPE_RW);
|
|
#endif
|
|
nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0,
|
|
MSR_IA32_SPEC_CTRL, MSR_TYPE_RW);
|
|
|
|
nested_vmx_set_intercept_for_msr(vmx, msr_bitmap_l1, msr_bitmap_l0,
|
|
MSR_IA32_PRED_CMD, MSR_TYPE_W);
|
|
|
|
kvm_vcpu_unmap(vcpu, &vmx->nested.msr_bitmap_map, false);
|
|
|
|
return true;
|
|
}
|
|
|
|
static void nested_cache_shadow_vmcs12(struct kvm_vcpu *vcpu,
|
|
struct vmcs12 *vmcs12)
|
|
{
|
|
struct kvm_host_map map;
|
|
struct vmcs12 *shadow;
|
|
|
|
if (!nested_cpu_has_shadow_vmcs(vmcs12) ||
|
|
vmcs12->vmcs_link_pointer == -1ull)
|
|
return;
|
|
|
|
shadow = get_shadow_vmcs12(vcpu);
|
|
|
|
if (kvm_vcpu_map(vcpu, gpa_to_gfn(vmcs12->vmcs_link_pointer), &map))
|
|
return;
|
|
|
|
memcpy(shadow, map.hva, VMCS12_SIZE);
|
|
kvm_vcpu_unmap(vcpu, &map, false);
|
|
}
|
|
|
|
static void nested_flush_cached_shadow_vmcs12(struct kvm_vcpu *vcpu,
|
|
struct vmcs12 *vmcs12)
|
|
{
|
|
struct vcpu_vmx *vmx = to_vmx(vcpu);
|
|
|
|
if (!nested_cpu_has_shadow_vmcs(vmcs12) ||
|
|
vmcs12->vmcs_link_pointer == -1ull)
|
|
return;
|
|
|
|
kvm_write_guest(vmx->vcpu.kvm, vmcs12->vmcs_link_pointer,
|
|
get_shadow_vmcs12(vcpu), VMCS12_SIZE);
|
|
}
|
|
|
|
/*
|
|
* In nested virtualization, check if L1 has set
|
|
* VM_EXIT_ACK_INTR_ON_EXIT
|
|
*/
|
|
static bool nested_exit_intr_ack_set(struct kvm_vcpu *vcpu)
|
|
{
|
|
return get_vmcs12(vcpu)->vm_exit_controls &
|
|
VM_EXIT_ACK_INTR_ON_EXIT;
|
|
}
|
|
|
|
static int nested_vmx_check_apic_access_controls(struct kvm_vcpu *vcpu,
|
|
struct vmcs12 *vmcs12)
|
|
{
|
|
if (nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES) &&
|
|
CC(!page_address_valid(vcpu, vmcs12->apic_access_addr)))
|
|
return -EINVAL;
|
|
else
|
|
return 0;
|
|
}
|
|
|
|
static int nested_vmx_check_apicv_controls(struct kvm_vcpu *vcpu,
|
|
struct vmcs12 *vmcs12)
|
|
{
|
|
if (!nested_cpu_has_virt_x2apic_mode(vmcs12) &&
|
|
!nested_cpu_has_apic_reg_virt(vmcs12) &&
|
|
!nested_cpu_has_vid(vmcs12) &&
|
|
!nested_cpu_has_posted_intr(vmcs12))
|
|
return 0;
|
|
|
|
/*
|
|
* If virtualize x2apic mode is enabled,
|
|
* virtualize apic access must be disabled.
|
|
*/
|
|
if (CC(nested_cpu_has_virt_x2apic_mode(vmcs12) &&
|
|
nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)))
|
|
return -EINVAL;
|
|
|
|
/*
|
|
* If virtual interrupt delivery is enabled,
|
|
* we must exit on external interrupts.
|
|
*/
|
|
if (CC(nested_cpu_has_vid(vmcs12) && !nested_exit_on_intr(vcpu)))
|
|
return -EINVAL;
|
|
|
|
/*
|
|
* bits 15:8 should be zero in posted_intr_nv,
|
|
* the descriptor address has been already checked
|
|
* in nested_get_vmcs12_pages.
|
|
*
|
|
* bits 5:0 of posted_intr_desc_addr should be zero.
|
|
*/
|
|
if (nested_cpu_has_posted_intr(vmcs12) &&
|
|
(CC(!nested_cpu_has_vid(vmcs12)) ||
|
|
CC(!nested_exit_intr_ack_set(vcpu)) ||
|
|
CC((vmcs12->posted_intr_nv & 0xff00)) ||
|
|
CC(!kvm_vcpu_is_legal_aligned_gpa(vcpu, vmcs12->posted_intr_desc_addr, 64))))
|
|
return -EINVAL;
|
|
|
|
/* tpr shadow is needed by all apicv features. */
|
|
if (CC(!nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW)))
|
|
return -EINVAL;
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int nested_vmx_check_msr_switch(struct kvm_vcpu *vcpu,
|
|
u32 count, u64 addr)
|
|
{
|
|
if (count == 0)
|
|
return 0;
|
|
|
|
if (!kvm_vcpu_is_legal_aligned_gpa(vcpu, addr, 16) ||
|
|
!kvm_vcpu_is_legal_gpa(vcpu, (addr + count * sizeof(struct vmx_msr_entry) - 1)))
|
|
return -EINVAL;
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int nested_vmx_check_exit_msr_switch_controls(struct kvm_vcpu *vcpu,
|
|
struct vmcs12 *vmcs12)
|
|
{
|
|
if (CC(nested_vmx_check_msr_switch(vcpu,
|
|
vmcs12->vm_exit_msr_load_count,
|
|
vmcs12->vm_exit_msr_load_addr)) ||
|
|
CC(nested_vmx_check_msr_switch(vcpu,
|
|
vmcs12->vm_exit_msr_store_count,
|
|
vmcs12->vm_exit_msr_store_addr)))
|
|
return -EINVAL;
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int nested_vmx_check_entry_msr_switch_controls(struct kvm_vcpu *vcpu,
|
|
struct vmcs12 *vmcs12)
|
|
{
|
|
if (CC(nested_vmx_check_msr_switch(vcpu,
|
|
vmcs12->vm_entry_msr_load_count,
|
|
vmcs12->vm_entry_msr_load_addr)))
|
|
return -EINVAL;
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int nested_vmx_check_pml_controls(struct kvm_vcpu *vcpu,
|
|
struct vmcs12 *vmcs12)
|
|
{
|
|
if (!nested_cpu_has_pml(vmcs12))
|
|
return 0;
|
|
|
|
if (CC(!nested_cpu_has_ept(vmcs12)) ||
|
|
CC(!page_address_valid(vcpu, vmcs12->pml_address)))
|
|
return -EINVAL;
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int nested_vmx_check_unrestricted_guest_controls(struct kvm_vcpu *vcpu,
|
|
struct vmcs12 *vmcs12)
|
|
{
|
|
if (CC(nested_cpu_has2(vmcs12, SECONDARY_EXEC_UNRESTRICTED_GUEST) &&
|
|
!nested_cpu_has_ept(vmcs12)))
|
|
return -EINVAL;
|
|
return 0;
|
|
}
|
|
|
|
static int nested_vmx_check_mode_based_ept_exec_controls(struct kvm_vcpu *vcpu,
|
|
struct vmcs12 *vmcs12)
|
|
{
|
|
if (CC(nested_cpu_has2(vmcs12, SECONDARY_EXEC_MODE_BASED_EPT_EXEC) &&
|
|
!nested_cpu_has_ept(vmcs12)))
|
|
return -EINVAL;
|
|
return 0;
|
|
}
|
|
|
|
static int nested_vmx_check_shadow_vmcs_controls(struct kvm_vcpu *vcpu,
|
|
struct vmcs12 *vmcs12)
|
|
{
|
|
if (!nested_cpu_has_shadow_vmcs(vmcs12))
|
|
return 0;
|
|
|
|
if (CC(!page_address_valid(vcpu, vmcs12->vmread_bitmap)) ||
|
|
CC(!page_address_valid(vcpu, vmcs12->vmwrite_bitmap)))
|
|
return -EINVAL;
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int nested_vmx_msr_check_common(struct kvm_vcpu *vcpu,
|
|
struct vmx_msr_entry *e)
|
|
{
|
|
/* x2APIC MSR accesses are not allowed */
|
|
if (CC(vcpu->arch.apic_base & X2APIC_ENABLE && e->index >> 8 == 0x8))
|
|
return -EINVAL;
|
|
if (CC(e->index == MSR_IA32_UCODE_WRITE) || /* SDM Table 35-2 */
|
|
CC(e->index == MSR_IA32_UCODE_REV))
|
|
return -EINVAL;
|
|
if (CC(e->reserved != 0))
|
|
return -EINVAL;
|
|
return 0;
|
|
}
|
|
|
|
static int nested_vmx_load_msr_check(struct kvm_vcpu *vcpu,
|
|
struct vmx_msr_entry *e)
|
|
{
|
|
if (CC(e->index == MSR_FS_BASE) ||
|
|
CC(e->index == MSR_GS_BASE) ||
|
|
CC(e->index == MSR_IA32_SMM_MONITOR_CTL) || /* SMM is not supported */
|
|
nested_vmx_msr_check_common(vcpu, e))
|
|
return -EINVAL;
|
|
return 0;
|
|
}
|
|
|
|
static int nested_vmx_store_msr_check(struct kvm_vcpu *vcpu,
|
|
struct vmx_msr_entry *e)
|
|
{
|
|
if (CC(e->index == MSR_IA32_SMBASE) || /* SMM is not supported */
|
|
nested_vmx_msr_check_common(vcpu, e))
|
|
return -EINVAL;
|
|
return 0;
|
|
}
|
|
|
|
static u32 nested_vmx_max_atomic_switch_msrs(struct kvm_vcpu *vcpu)
|
|
{
|
|
struct vcpu_vmx *vmx = to_vmx(vcpu);
|
|
u64 vmx_misc = vmx_control_msr(vmx->nested.msrs.misc_low,
|
|
vmx->nested.msrs.misc_high);
|
|
|
|
return (vmx_misc_max_msr(vmx_misc) + 1) * VMX_MISC_MSR_LIST_MULTIPLIER;
|
|
}
|
|
|
|
/*
|
|
* Load guest's/host's msr at nested entry/exit.
|
|
* return 0 for success, entry index for failure.
|
|
*
|
|
* One of the failure modes for MSR load/store is when a list exceeds the
|
|
* virtual hardware's capacity. To maintain compatibility with hardware inasmuch
|
|
* as possible, process all valid entries before failing rather than precheck
|
|
* for a capacity violation.
|
|
*/
|
|
static u32 nested_vmx_load_msr(struct kvm_vcpu *vcpu, u64 gpa, u32 count)
|
|
{
|
|
u32 i;
|
|
struct vmx_msr_entry e;
|
|
u32 max_msr_list_size = nested_vmx_max_atomic_switch_msrs(vcpu);
|
|
|
|
for (i = 0; i < count; i++) {
|
|
if (unlikely(i >= max_msr_list_size))
|
|
goto fail;
|
|
|
|
if (kvm_vcpu_read_guest(vcpu, gpa + i * sizeof(e),
|
|
&e, sizeof(e))) {
|
|
pr_debug_ratelimited(
|
|
"%s cannot read MSR entry (%u, 0x%08llx)\n",
|
|
__func__, i, gpa + i * sizeof(e));
|
|
goto fail;
|
|
}
|
|
if (nested_vmx_load_msr_check(vcpu, &e)) {
|
|
pr_debug_ratelimited(
|
|
"%s check failed (%u, 0x%x, 0x%x)\n",
|
|
__func__, i, e.index, e.reserved);
|
|
goto fail;
|
|
}
|
|
if (kvm_set_msr(vcpu, e.index, e.value)) {
|
|
pr_debug_ratelimited(
|
|
"%s cannot write MSR (%u, 0x%x, 0x%llx)\n",
|
|
__func__, i, e.index, e.value);
|
|
goto fail;
|
|
}
|
|
}
|
|
return 0;
|
|
fail:
|
|
/* Note, max_msr_list_size is at most 4096, i.e. this can't wrap. */
|
|
return i + 1;
|
|
}
|
|
|
|
static bool nested_vmx_get_vmexit_msr_value(struct kvm_vcpu *vcpu,
|
|
u32 msr_index,
|
|
u64 *data)
|
|
{
|
|
struct vcpu_vmx *vmx = to_vmx(vcpu);
|
|
|
|
/*
|
|
* If the L0 hypervisor stored a more accurate value for the TSC that
|
|
* does not include the time taken for emulation of the L2->L1
|
|
* VM-exit in L0, use the more accurate value.
|
|
*/
|
|
if (msr_index == MSR_IA32_TSC) {
|
|
int i = vmx_find_loadstore_msr_slot(&vmx->msr_autostore.guest,
|
|
MSR_IA32_TSC);
|
|
|
|
if (i >= 0) {
|
|
u64 val = vmx->msr_autostore.guest.val[i].value;
|
|
|
|
*data = kvm_read_l1_tsc(vcpu, val);
|
|
return true;
|
|
}
|
|
}
|
|
|
|
if (kvm_get_msr(vcpu, msr_index, data)) {
|
|
pr_debug_ratelimited("%s cannot read MSR (0x%x)\n", __func__,
|
|
msr_index);
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
static bool read_and_check_msr_entry(struct kvm_vcpu *vcpu, u64 gpa, int i,
|
|
struct vmx_msr_entry *e)
|
|
{
|
|
if (kvm_vcpu_read_guest(vcpu,
|
|
gpa + i * sizeof(*e),
|
|
e, 2 * sizeof(u32))) {
|
|
pr_debug_ratelimited(
|
|
"%s cannot read MSR entry (%u, 0x%08llx)\n",
|
|
__func__, i, gpa + i * sizeof(*e));
|
|
return false;
|
|
}
|
|
if (nested_vmx_store_msr_check(vcpu, e)) {
|
|
pr_debug_ratelimited(
|
|
"%s check failed (%u, 0x%x, 0x%x)\n",
|
|
__func__, i, e->index, e->reserved);
|
|
return false;
|
|
}
|
|
return true;
|
|
}
|
|
|
|
static int nested_vmx_store_msr(struct kvm_vcpu *vcpu, u64 gpa, u32 count)
|
|
{
|
|
u64 data;
|
|
u32 i;
|
|
struct vmx_msr_entry e;
|
|
u32 max_msr_list_size = nested_vmx_max_atomic_switch_msrs(vcpu);
|
|
|
|
for (i = 0; i < count; i++) {
|
|
if (unlikely(i >= max_msr_list_size))
|
|
return -EINVAL;
|
|
|
|
if (!read_and_check_msr_entry(vcpu, gpa, i, &e))
|
|
return -EINVAL;
|
|
|
|
if (!nested_vmx_get_vmexit_msr_value(vcpu, e.index, &data))
|
|
return -EINVAL;
|
|
|
|
if (kvm_vcpu_write_guest(vcpu,
|
|
gpa + i * sizeof(e) +
|
|
offsetof(struct vmx_msr_entry, value),
|
|
&data, sizeof(data))) {
|
|
pr_debug_ratelimited(
|
|
"%s cannot write MSR (%u, 0x%x, 0x%llx)\n",
|
|
__func__, i, e.index, data);
|
|
return -EINVAL;
|
|
}
|
|
}
|
|
return 0;
|
|
}
|
|
|
|
static bool nested_msr_store_list_has_msr(struct kvm_vcpu *vcpu, u32 msr_index)
|
|
{
|
|
struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
|
|
u32 count = vmcs12->vm_exit_msr_store_count;
|
|
u64 gpa = vmcs12->vm_exit_msr_store_addr;
|
|
struct vmx_msr_entry e;
|
|
u32 i;
|
|
|
|
for (i = 0; i < count; i++) {
|
|
if (!read_and_check_msr_entry(vcpu, gpa, i, &e))
|
|
return false;
|
|
|
|
if (e.index == msr_index)
|
|
return true;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
static void prepare_vmx_msr_autostore_list(struct kvm_vcpu *vcpu,
|
|
u32 msr_index)
|
|
{
|
|
struct vcpu_vmx *vmx = to_vmx(vcpu);
|
|
struct vmx_msrs *autostore = &vmx->msr_autostore.guest;
|
|
bool in_vmcs12_store_list;
|
|
int msr_autostore_slot;
|
|
bool in_autostore_list;
|
|
int last;
|
|
|
|
msr_autostore_slot = vmx_find_loadstore_msr_slot(autostore, msr_index);
|
|
in_autostore_list = msr_autostore_slot >= 0;
|
|
in_vmcs12_store_list = nested_msr_store_list_has_msr(vcpu, msr_index);
|
|
|
|
if (in_vmcs12_store_list && !in_autostore_list) {
|
|
if (autostore->nr == MAX_NR_LOADSTORE_MSRS) {
|
|
/*
|
|
* Emulated VMEntry does not fail here. Instead a less
|
|
* accurate value will be returned by
|
|
* nested_vmx_get_vmexit_msr_value() using kvm_get_msr()
|
|
* instead of reading the value from the vmcs02 VMExit
|
|
* MSR-store area.
|
|
*/
|
|
pr_warn_ratelimited(
|
|
"Not enough msr entries in msr_autostore. Can't add msr %x\n",
|
|
msr_index);
|
|
return;
|
|
}
|
|
last = autostore->nr++;
|
|
autostore->val[last].index = msr_index;
|
|
} else if (!in_vmcs12_store_list && in_autostore_list) {
|
|
last = --autostore->nr;
|
|
autostore->val[msr_autostore_slot] = autostore->val[last];
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Load guest's/host's cr3 at nested entry/exit. @nested_ept is true if we are
|
|
* emulating VM-Entry into a guest with EPT enabled. On failure, the expected
|
|
* Exit Qualification (for a VM-Entry consistency check VM-Exit) is assigned to
|
|
* @entry_failure_code.
|
|
*/
|
|
static int nested_vmx_load_cr3(struct kvm_vcpu *vcpu, unsigned long cr3,
|
|
bool nested_ept, bool reload_pdptrs,
|
|
enum vm_entry_failure_code *entry_failure_code)
|
|
{
|
|
if (CC(kvm_vcpu_is_illegal_gpa(vcpu, cr3))) {
|
|
*entry_failure_code = ENTRY_FAIL_DEFAULT;
|
|
return -EINVAL;
|
|
}
|
|
|
|
/*
|
|
* If PAE paging and EPT are both on, CR3 is not used by the CPU and
|
|
* must not be dereferenced.
|
|
*/
|
|
if (reload_pdptrs && !nested_ept && is_pae_paging(vcpu) &&
|
|
CC(!load_pdptrs(vcpu, vcpu->arch.walk_mmu, cr3))) {
|
|
*entry_failure_code = ENTRY_FAIL_PDPTE;
|
|
return -EINVAL;
|
|
}
|
|
|
|
if (!nested_ept)
|
|
kvm_mmu_new_pgd(vcpu, cr3);
|
|
|
|
vcpu->arch.cr3 = cr3;
|
|
kvm_register_mark_available(vcpu, VCPU_EXREG_CR3);
|
|
|
|
/* Re-initialize the MMU, e.g. to pick up CR4 MMU role changes. */
|
|
kvm_init_mmu(vcpu);
|
|
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* Returns if KVM is able to config CPU to tag TLB entries
|
|
* populated by L2 differently than TLB entries populated
|
|
* by L1.
|
|
*
|
|
* If L0 uses EPT, L1 and L2 run with different EPTP because
|
|
* guest_mode is part of kvm_mmu_page_role. Thus, TLB entries
|
|
* are tagged with different EPTP.
|
|
*
|
|
* If L1 uses VPID and we allocated a vpid02, TLB entries are tagged
|
|
* with different VPID (L1 entries are tagged with vmx->vpid
|
|
* while L2 entries are tagged with vmx->nested.vpid02).
|
|
*/
|
|
static bool nested_has_guest_tlb_tag(struct kvm_vcpu *vcpu)
|
|
{
|
|
struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
|
|
|
|
return enable_ept ||
|
|
(nested_cpu_has_vpid(vmcs12) && to_vmx(vcpu)->nested.vpid02);
|
|
}
|
|
|
|
static void nested_vmx_transition_tlb_flush(struct kvm_vcpu *vcpu,
|
|
struct vmcs12 *vmcs12,
|
|
bool is_vmenter)
|
|
{
|
|
struct vcpu_vmx *vmx = to_vmx(vcpu);
|
|
|
|
/*
|
|
* If vmcs12 doesn't use VPID, L1 expects linear and combined mappings
|
|
* for *all* contexts to be flushed on VM-Enter/VM-Exit, i.e. it's a
|
|
* full TLB flush from the guest's perspective. This is required even
|
|
* if VPID is disabled in the host as KVM may need to synchronize the
|
|
* MMU in response to the guest TLB flush.
|
|
*
|
|
* Note, using TLB_FLUSH_GUEST is correct even if nested EPT is in use.
|
|
* EPT is a special snowflake, as guest-physical mappings aren't
|
|
* flushed on VPID invalidations, including VM-Enter or VM-Exit with
|
|
* VPID disabled. As a result, KVM _never_ needs to sync nEPT
|
|
* entries on VM-Enter because L1 can't rely on VM-Enter to flush
|
|
* those mappings.
|
|
*/
|
|
if (!nested_cpu_has_vpid(vmcs12)) {
|
|
kvm_make_request(KVM_REQ_TLB_FLUSH_GUEST, vcpu);
|
|
return;
|
|
}
|
|
|
|
/* L2 should never have a VPID if VPID is disabled. */
|
|
WARN_ON(!enable_vpid);
|
|
|
|
/*
|
|
* VPID is enabled and in use by vmcs12. If vpid12 is changing, then
|
|
* emulate a guest TLB flush as KVM does not track vpid12 history nor
|
|
* is the VPID incorporated into the MMU context. I.e. KVM must assume
|
|
* that the new vpid12 has never been used and thus represents a new
|
|
* guest ASID that cannot have entries in the TLB.
|
|
*/
|
|
if (is_vmenter && vmcs12->virtual_processor_id != vmx->nested.last_vpid) {
|
|
vmx->nested.last_vpid = vmcs12->virtual_processor_id;
|
|
kvm_make_request(KVM_REQ_TLB_FLUSH_GUEST, vcpu);
|
|
return;
|
|
}
|
|
|
|
/*
|
|
* If VPID is enabled, used by vmc12, and vpid12 is not changing but
|
|
* does not have a unique TLB tag (ASID), i.e. EPT is disabled and
|
|
* KVM was unable to allocate a VPID for L2, flush the current context
|
|
* as the effective ASID is common to both L1 and L2.
|
|
*/
|
|
if (!nested_has_guest_tlb_tag(vcpu))
|
|
kvm_make_request(KVM_REQ_TLB_FLUSH_CURRENT, vcpu);
|
|
}
|
|
|
|
static bool is_bitwise_subset(u64 superset, u64 subset, u64 mask)
|
|
{
|
|
superset &= mask;
|
|
subset &= mask;
|
|
|
|
return (superset | subset) == superset;
|
|
}
|
|
|
|
static int vmx_restore_vmx_basic(struct vcpu_vmx *vmx, u64 data)
|
|
{
|
|
const u64 feature_and_reserved =
|
|
/* feature (except bit 48; see below) */
|
|
BIT_ULL(49) | BIT_ULL(54) | BIT_ULL(55) |
|
|
/* reserved */
|
|
BIT_ULL(31) | GENMASK_ULL(47, 45) | GENMASK_ULL(63, 56);
|
|
u64 vmx_basic = vmcs_config.nested.basic;
|
|
|
|
if (!is_bitwise_subset(vmx_basic, data, feature_and_reserved))
|
|
return -EINVAL;
|
|
|
|
/*
|
|
* KVM does not emulate a version of VMX that constrains physical
|
|
* addresses of VMX structures (e.g. VMCS) to 32-bits.
|
|
*/
|
|
if (data & BIT_ULL(48))
|
|
return -EINVAL;
|
|
|
|
if (vmx_basic_vmcs_revision_id(vmx_basic) !=
|
|
vmx_basic_vmcs_revision_id(data))
|
|
return -EINVAL;
|
|
|
|
if (vmx_basic_vmcs_size(vmx_basic) > vmx_basic_vmcs_size(data))
|
|
return -EINVAL;
|
|
|
|
vmx->nested.msrs.basic = data;
|
|
return 0;
|
|
}
|
|
|
|
static void vmx_get_control_msr(struct nested_vmx_msrs *msrs, u32 msr_index,
|
|
u32 **low, u32 **high)
|
|
{
|
|
switch (msr_index) {
|
|
case MSR_IA32_VMX_TRUE_PINBASED_CTLS:
|
|
*low = &msrs->pinbased_ctls_low;
|
|
*high = &msrs->pinbased_ctls_high;
|
|
break;
|
|
case MSR_IA32_VMX_TRUE_PROCBASED_CTLS:
|
|
*low = &msrs->procbased_ctls_low;
|
|
*high = &msrs->procbased_ctls_high;
|
|
break;
|
|
case MSR_IA32_VMX_TRUE_EXIT_CTLS:
|
|
*low = &msrs->exit_ctls_low;
|
|
*high = &msrs->exit_ctls_high;
|
|
break;
|
|
case MSR_IA32_VMX_TRUE_ENTRY_CTLS:
|
|
*low = &msrs->entry_ctls_low;
|
|
*high = &msrs->entry_ctls_high;
|
|
break;
|
|
case MSR_IA32_VMX_PROCBASED_CTLS2:
|
|
*low = &msrs->secondary_ctls_low;
|
|
*high = &msrs->secondary_ctls_high;
|
|
break;
|
|
default:
|
|
BUG();
|
|
}
|
|
}
|
|
|
|
static int
|
|
vmx_restore_control_msr(struct vcpu_vmx *vmx, u32 msr_index, u64 data)
|
|
{
|
|
u32 *lowp, *highp;
|
|
u64 supported;
|
|
|
|
vmx_get_control_msr(&vmcs_config.nested, msr_index, &lowp, &highp);
|
|
|
|
supported = vmx_control_msr(*lowp, *highp);
|
|
|
|
/* Check must-be-1 bits are still 1. */
|
|
if (!is_bitwise_subset(data, supported, GENMASK_ULL(31, 0)))
|
|
return -EINVAL;
|
|
|
|
/* Check must-be-0 bits are still 0. */
|
|
if (!is_bitwise_subset(supported, data, GENMASK_ULL(63, 32)))
|
|
return -EINVAL;
|
|
|
|
vmx_get_control_msr(&vmx->nested.msrs, msr_index, &lowp, &highp);
|
|
*lowp = data;
|
|
*highp = data >> 32;
|
|
return 0;
|
|
}
|
|
|
|
static int vmx_restore_vmx_misc(struct vcpu_vmx *vmx, u64 data)
|
|
{
|
|
const u64 feature_and_reserved_bits =
|
|
/* feature */
|
|
BIT_ULL(5) | GENMASK_ULL(8, 6) | BIT_ULL(14) | BIT_ULL(15) |
|
|
BIT_ULL(28) | BIT_ULL(29) | BIT_ULL(30) |
|
|
/* reserved */
|
|
GENMASK_ULL(13, 9) | BIT_ULL(31);
|
|
u64 vmx_misc = vmx_control_msr(vmcs_config.nested.misc_low,
|
|
vmcs_config.nested.misc_high);
|
|
|
|
if (!is_bitwise_subset(vmx_misc, data, feature_and_reserved_bits))
|
|
return -EINVAL;
|
|
|
|
if ((vmx->nested.msrs.pinbased_ctls_high &
|
|
PIN_BASED_VMX_PREEMPTION_TIMER) &&
|
|
vmx_misc_preemption_timer_rate(data) !=
|
|
vmx_misc_preemption_timer_rate(vmx_misc))
|
|
return -EINVAL;
|
|
|
|
if (vmx_misc_cr3_count(data) > vmx_misc_cr3_count(vmx_misc))
|
|
return -EINVAL;
|
|
|
|
if (vmx_misc_max_msr(data) > vmx_misc_max_msr(vmx_misc))
|
|
return -EINVAL;
|
|
|
|
if (vmx_misc_mseg_revid(data) != vmx_misc_mseg_revid(vmx_misc))
|
|
return -EINVAL;
|
|
|
|
vmx->nested.msrs.misc_low = data;
|
|
vmx->nested.msrs.misc_high = data >> 32;
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int vmx_restore_vmx_ept_vpid_cap(struct vcpu_vmx *vmx, u64 data)
|
|
{
|
|
u64 vmx_ept_vpid_cap = vmx_control_msr(vmcs_config.nested.ept_caps,
|
|
vmcs_config.nested.vpid_caps);
|
|
|
|
/* Every bit is either reserved or a feature bit. */
|
|
if (!is_bitwise_subset(vmx_ept_vpid_cap, data, -1ULL))
|
|
return -EINVAL;
|
|
|
|
vmx->nested.msrs.ept_caps = data;
|
|
vmx->nested.msrs.vpid_caps = data >> 32;
|
|
return 0;
|
|
}
|
|
|
|
static u64 *vmx_get_fixed0_msr(struct nested_vmx_msrs *msrs, u32 msr_index)
|
|
{
|
|
switch (msr_index) {
|
|
case MSR_IA32_VMX_CR0_FIXED0:
|
|
return &msrs->cr0_fixed0;
|
|
case MSR_IA32_VMX_CR4_FIXED0:
|
|
return &msrs->cr4_fixed0;
|
|
default:
|
|
BUG();
|
|
}
|
|
}
|
|
|
|
static int vmx_restore_fixed0_msr(struct vcpu_vmx *vmx, u32 msr_index, u64 data)
|
|
{
|
|
const u64 *msr = vmx_get_fixed0_msr(&vmcs_config.nested, msr_index);
|
|
|
|
/*
|
|
* 1 bits (which indicates bits which "must-be-1" during VMX operation)
|
|
* must be 1 in the restored value.
|
|
*/
|
|
if (!is_bitwise_subset(data, *msr, -1ULL))
|
|
return -EINVAL;
|
|
|
|
*vmx_get_fixed0_msr(&vmx->nested.msrs, msr_index) = data;
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* Called when userspace is restoring VMX MSRs.
|
|
*
|
|
* Returns 0 on success, non-0 otherwise.
|
|
*/
|
|
int vmx_set_vmx_msr(struct kvm_vcpu *vcpu, u32 msr_index, u64 data)
|
|
{
|
|
struct vcpu_vmx *vmx = to_vmx(vcpu);
|
|
|
|
/*
|
|
* Don't allow changes to the VMX capability MSRs while the vCPU
|
|
* is in VMX operation.
|
|
*/
|
|
if (vmx->nested.vmxon)
|
|
return -EBUSY;
|
|
|
|
switch (msr_index) {
|
|
case MSR_IA32_VMX_BASIC:
|
|
return vmx_restore_vmx_basic(vmx, data);
|
|
case MSR_IA32_VMX_PINBASED_CTLS:
|
|
case MSR_IA32_VMX_PROCBASED_CTLS:
|
|
case MSR_IA32_VMX_EXIT_CTLS:
|
|
case MSR_IA32_VMX_ENTRY_CTLS:
|
|
/*
|
|
* The "non-true" VMX capability MSRs are generated from the
|
|
* "true" MSRs, so we do not support restoring them directly.
|
|
*
|
|
* If userspace wants to emulate VMX_BASIC[55]=0, userspace
|
|
* should restore the "true" MSRs with the must-be-1 bits
|
|
* set according to the SDM Vol 3. A.2 "RESERVED CONTROLS AND
|
|
* DEFAULT SETTINGS".
|
|
*/
|
|
return -EINVAL;
|
|
case MSR_IA32_VMX_TRUE_PINBASED_CTLS:
|
|
case MSR_IA32_VMX_TRUE_PROCBASED_CTLS:
|
|
case MSR_IA32_VMX_TRUE_EXIT_CTLS:
|
|
case MSR_IA32_VMX_TRUE_ENTRY_CTLS:
|
|
case MSR_IA32_VMX_PROCBASED_CTLS2:
|
|
return vmx_restore_control_msr(vmx, msr_index, data);
|
|
case MSR_IA32_VMX_MISC:
|
|
return vmx_restore_vmx_misc(vmx, data);
|
|
case MSR_IA32_VMX_CR0_FIXED0:
|
|
case MSR_IA32_VMX_CR4_FIXED0:
|
|
return vmx_restore_fixed0_msr(vmx, msr_index, data);
|
|
case MSR_IA32_VMX_CR0_FIXED1:
|
|
case MSR_IA32_VMX_CR4_FIXED1:
|
|
/*
|
|
* These MSRs are generated based on the vCPU's CPUID, so we
|
|
* do not support restoring them directly.
|
|
*/
|
|
return -EINVAL;
|
|
case MSR_IA32_VMX_EPT_VPID_CAP:
|
|
return vmx_restore_vmx_ept_vpid_cap(vmx, data);
|
|
case MSR_IA32_VMX_VMCS_ENUM:
|
|
vmx->nested.msrs.vmcs_enum = data;
|
|
return 0;
|
|
case MSR_IA32_VMX_VMFUNC:
|
|
if (data & ~vmcs_config.nested.vmfunc_controls)
|
|
return -EINVAL;
|
|
vmx->nested.msrs.vmfunc_controls = data;
|
|
return 0;
|
|
default:
|
|
/*
|
|
* The rest of the VMX capability MSRs do not support restore.
|
|
*/
|
|
return -EINVAL;
|
|
}
|
|
}
|
|
|
|
/* Returns 0 on success, non-0 otherwise. */
|
|
int vmx_get_vmx_msr(struct nested_vmx_msrs *msrs, u32 msr_index, u64 *pdata)
|
|
{
|
|
switch (msr_index) {
|
|
case MSR_IA32_VMX_BASIC:
|
|
*pdata = msrs->basic;
|
|
break;
|
|
case MSR_IA32_VMX_TRUE_PINBASED_CTLS:
|
|
case MSR_IA32_VMX_PINBASED_CTLS:
|
|
*pdata = vmx_control_msr(
|
|
msrs->pinbased_ctls_low,
|
|
msrs->pinbased_ctls_high);
|
|
if (msr_index == MSR_IA32_VMX_PINBASED_CTLS)
|
|
*pdata |= PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR;
|
|
break;
|
|
case MSR_IA32_VMX_TRUE_PROCBASED_CTLS:
|
|
case MSR_IA32_VMX_PROCBASED_CTLS:
|
|
*pdata = vmx_control_msr(
|
|
msrs->procbased_ctls_low,
|
|
msrs->procbased_ctls_high);
|
|
if (msr_index == MSR_IA32_VMX_PROCBASED_CTLS)
|
|
*pdata |= CPU_BASED_ALWAYSON_WITHOUT_TRUE_MSR;
|
|
break;
|
|
case MSR_IA32_VMX_TRUE_EXIT_CTLS:
|
|
case MSR_IA32_VMX_EXIT_CTLS:
|
|
*pdata = vmx_control_msr(
|
|
msrs->exit_ctls_low,
|
|
msrs->exit_ctls_high);
|
|
if (msr_index == MSR_IA32_VMX_EXIT_CTLS)
|
|
*pdata |= VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR;
|
|
break;
|
|
case MSR_IA32_VMX_TRUE_ENTRY_CTLS:
|
|
case MSR_IA32_VMX_ENTRY_CTLS:
|
|
*pdata = vmx_control_msr(
|
|
msrs->entry_ctls_low,
|
|
msrs->entry_ctls_high);
|
|
if (msr_index == MSR_IA32_VMX_ENTRY_CTLS)
|
|
*pdata |= VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR;
|
|
break;
|
|
case MSR_IA32_VMX_MISC:
|
|
*pdata = vmx_control_msr(
|
|
msrs->misc_low,
|
|
msrs->misc_high);
|
|
break;
|
|
case MSR_IA32_VMX_CR0_FIXED0:
|
|
*pdata = msrs->cr0_fixed0;
|
|
break;
|
|
case MSR_IA32_VMX_CR0_FIXED1:
|
|
*pdata = msrs->cr0_fixed1;
|
|
break;
|
|
case MSR_IA32_VMX_CR4_FIXED0:
|
|
*pdata = msrs->cr4_fixed0;
|
|
break;
|
|
case MSR_IA32_VMX_CR4_FIXED1:
|
|
*pdata = msrs->cr4_fixed1;
|
|
break;
|
|
case MSR_IA32_VMX_VMCS_ENUM:
|
|
*pdata = msrs->vmcs_enum;
|
|
break;
|
|
case MSR_IA32_VMX_PROCBASED_CTLS2:
|
|
*pdata = vmx_control_msr(
|
|
msrs->secondary_ctls_low,
|
|
msrs->secondary_ctls_high);
|
|
break;
|
|
case MSR_IA32_VMX_EPT_VPID_CAP:
|
|
*pdata = msrs->ept_caps |
|
|
((u64)msrs->vpid_caps << 32);
|
|
break;
|
|
case MSR_IA32_VMX_VMFUNC:
|
|
*pdata = msrs->vmfunc_controls;
|
|
break;
|
|
default:
|
|
return 1;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* Copy the writable VMCS shadow fields back to the VMCS12, in case they have
|
|
* been modified by the L1 guest. Note, "writable" in this context means
|
|
* "writable by the guest", i.e. tagged SHADOW_FIELD_RW; the set of
|
|
* fields tagged SHADOW_FIELD_RO may or may not align with the "read-only"
|
|
* VM-exit information fields (which are actually writable if the vCPU is
|
|
* configured to support "VMWRITE to any supported field in the VMCS").
|
|
*/
|
|
static void copy_shadow_to_vmcs12(struct vcpu_vmx *vmx)
|
|
{
|
|
struct vmcs *shadow_vmcs = vmx->vmcs01.shadow_vmcs;
|
|
struct vmcs12 *vmcs12 = get_vmcs12(&vmx->vcpu);
|
|
struct shadow_vmcs_field field;
|
|
unsigned long val;
|
|
int i;
|
|
|
|
if (WARN_ON(!shadow_vmcs))
|
|
return;
|
|
|
|
preempt_disable();
|
|
|
|
vmcs_load(shadow_vmcs);
|
|
|
|
for (i = 0; i < max_shadow_read_write_fields; i++) {
|
|
field = shadow_read_write_fields[i];
|
|
val = __vmcs_readl(field.encoding);
|
|
vmcs12_write_any(vmcs12, field.encoding, field.offset, val);
|
|
}
|
|
|
|
vmcs_clear(shadow_vmcs);
|
|
vmcs_load(vmx->loaded_vmcs->vmcs);
|
|
|
|
preempt_enable();
|
|
}
|
|
|
|
static void copy_vmcs12_to_shadow(struct vcpu_vmx *vmx)
|
|
{
|
|
const struct shadow_vmcs_field *fields[] = {
|
|
shadow_read_write_fields,
|
|
shadow_read_only_fields
|
|
};
|
|
const int max_fields[] = {
|
|
max_shadow_read_write_fields,
|
|
max_shadow_read_only_fields
|
|
};
|
|
struct vmcs *shadow_vmcs = vmx->vmcs01.shadow_vmcs;
|
|
struct vmcs12 *vmcs12 = get_vmcs12(&vmx->vcpu);
|
|
struct shadow_vmcs_field field;
|
|
unsigned long val;
|
|
int i, q;
|
|
|
|
if (WARN_ON(!shadow_vmcs))
|
|
return;
|
|
|
|
vmcs_load(shadow_vmcs);
|
|
|
|
for (q = 0; q < ARRAY_SIZE(fields); q++) {
|
|
for (i = 0; i < max_fields[q]; i++) {
|
|
field = fields[q][i];
|
|
val = vmcs12_read_any(vmcs12, field.encoding,
|
|
field.offset);
|
|
__vmcs_writel(field.encoding, val);
|
|
}
|
|
}
|
|
|
|
vmcs_clear(shadow_vmcs);
|
|
vmcs_load(vmx->loaded_vmcs->vmcs);
|
|
}
|
|
|
|
static void copy_enlightened_to_vmcs12(struct vcpu_vmx *vmx, u32 hv_clean_fields)
|
|
{
|
|
struct vmcs12 *vmcs12 = vmx->nested.cached_vmcs12;
|
|
struct hv_enlightened_vmcs *evmcs = vmx->nested.hv_evmcs;
|
|
|
|
/* HV_VMX_ENLIGHTENED_CLEAN_FIELD_NONE */
|
|
vmcs12->tpr_threshold = evmcs->tpr_threshold;
|
|
vmcs12->guest_rip = evmcs->guest_rip;
|
|
|
|
if (unlikely(!(hv_clean_fields &
|
|
HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_BASIC))) {
|
|
vmcs12->guest_rsp = evmcs->guest_rsp;
|
|
vmcs12->guest_rflags = evmcs->guest_rflags;
|
|
vmcs12->guest_interruptibility_info =
|
|
evmcs->guest_interruptibility_info;
|
|
}
|
|
|
|
if (unlikely(!(hv_clean_fields &
|
|
HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_PROC))) {
|
|
vmcs12->cpu_based_vm_exec_control =
|
|
evmcs->cpu_based_vm_exec_control;
|
|
}
|
|
|
|
if (unlikely(!(hv_clean_fields &
|
|
HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_EXCPN))) {
|
|
vmcs12->exception_bitmap = evmcs->exception_bitmap;
|
|
}
|
|
|
|
if (unlikely(!(hv_clean_fields &
|
|
HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_ENTRY))) {
|
|
vmcs12->vm_entry_controls = evmcs->vm_entry_controls;
|
|
}
|
|
|
|
if (unlikely(!(hv_clean_fields &
|
|
HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_EVENT))) {
|
|
vmcs12->vm_entry_intr_info_field =
|
|
evmcs->vm_entry_intr_info_field;
|
|
vmcs12->vm_entry_exception_error_code =
|
|
evmcs->vm_entry_exception_error_code;
|
|
vmcs12->vm_entry_instruction_len =
|
|
evmcs->vm_entry_instruction_len;
|
|
}
|
|
|
|
if (unlikely(!(hv_clean_fields &
|
|
HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_GRP1))) {
|
|
vmcs12->host_ia32_pat = evmcs->host_ia32_pat;
|
|
vmcs12->host_ia32_efer = evmcs->host_ia32_efer;
|
|
vmcs12->host_cr0 = evmcs->host_cr0;
|
|
vmcs12->host_cr3 = evmcs->host_cr3;
|
|
vmcs12->host_cr4 = evmcs->host_cr4;
|
|
vmcs12->host_ia32_sysenter_esp = evmcs->host_ia32_sysenter_esp;
|
|
vmcs12->host_ia32_sysenter_eip = evmcs->host_ia32_sysenter_eip;
|
|
vmcs12->host_rip = evmcs->host_rip;
|
|
vmcs12->host_ia32_sysenter_cs = evmcs->host_ia32_sysenter_cs;
|
|
vmcs12->host_es_selector = evmcs->host_es_selector;
|
|
vmcs12->host_cs_selector = evmcs->host_cs_selector;
|
|
vmcs12->host_ss_selector = evmcs->host_ss_selector;
|
|
vmcs12->host_ds_selector = evmcs->host_ds_selector;
|
|
vmcs12->host_fs_selector = evmcs->host_fs_selector;
|
|
vmcs12->host_gs_selector = evmcs->host_gs_selector;
|
|
vmcs12->host_tr_selector = evmcs->host_tr_selector;
|
|
}
|
|
|
|
if (unlikely(!(hv_clean_fields &
|
|
HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP1))) {
|
|
vmcs12->pin_based_vm_exec_control =
|
|
evmcs->pin_based_vm_exec_control;
|
|
vmcs12->vm_exit_controls = evmcs->vm_exit_controls;
|
|
vmcs12->secondary_vm_exec_control =
|
|
evmcs->secondary_vm_exec_control;
|
|
}
|
|
|
|
if (unlikely(!(hv_clean_fields &
|
|
HV_VMX_ENLIGHTENED_CLEAN_FIELD_IO_BITMAP))) {
|
|
vmcs12->io_bitmap_a = evmcs->io_bitmap_a;
|
|
vmcs12->io_bitmap_b = evmcs->io_bitmap_b;
|
|
}
|
|
|
|
if (unlikely(!(hv_clean_fields &
|
|
HV_VMX_ENLIGHTENED_CLEAN_FIELD_MSR_BITMAP))) {
|
|
vmcs12->msr_bitmap = evmcs->msr_bitmap;
|
|
}
|
|
|
|
if (unlikely(!(hv_clean_fields &
|
|
HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2))) {
|
|
vmcs12->guest_es_base = evmcs->guest_es_base;
|
|
vmcs12->guest_cs_base = evmcs->guest_cs_base;
|
|
vmcs12->guest_ss_base = evmcs->guest_ss_base;
|
|
vmcs12->guest_ds_base = evmcs->guest_ds_base;
|
|
vmcs12->guest_fs_base = evmcs->guest_fs_base;
|
|
vmcs12->guest_gs_base = evmcs->guest_gs_base;
|
|
vmcs12->guest_ldtr_base = evmcs->guest_ldtr_base;
|
|
vmcs12->guest_tr_base = evmcs->guest_tr_base;
|
|
vmcs12->guest_gdtr_base = evmcs->guest_gdtr_base;
|
|
vmcs12->guest_idtr_base = evmcs->guest_idtr_base;
|
|
vmcs12->guest_es_limit = evmcs->guest_es_limit;
|
|
vmcs12->guest_cs_limit = evmcs->guest_cs_limit;
|
|
vmcs12->guest_ss_limit = evmcs->guest_ss_limit;
|
|
vmcs12->guest_ds_limit = evmcs->guest_ds_limit;
|
|
vmcs12->guest_fs_limit = evmcs->guest_fs_limit;
|
|
vmcs12->guest_gs_limit = evmcs->guest_gs_limit;
|
|
vmcs12->guest_ldtr_limit = evmcs->guest_ldtr_limit;
|
|
vmcs12->guest_tr_limit = evmcs->guest_tr_limit;
|
|
vmcs12->guest_gdtr_limit = evmcs->guest_gdtr_limit;
|
|
vmcs12->guest_idtr_limit = evmcs->guest_idtr_limit;
|
|
vmcs12->guest_es_ar_bytes = evmcs->guest_es_ar_bytes;
|
|
vmcs12->guest_cs_ar_bytes = evmcs->guest_cs_ar_bytes;
|
|
vmcs12->guest_ss_ar_bytes = evmcs->guest_ss_ar_bytes;
|
|
vmcs12->guest_ds_ar_bytes = evmcs->guest_ds_ar_bytes;
|
|
vmcs12->guest_fs_ar_bytes = evmcs->guest_fs_ar_bytes;
|
|
vmcs12->guest_gs_ar_bytes = evmcs->guest_gs_ar_bytes;
|
|
vmcs12->guest_ldtr_ar_bytes = evmcs->guest_ldtr_ar_bytes;
|
|
vmcs12->guest_tr_ar_bytes = evmcs->guest_tr_ar_bytes;
|
|
vmcs12->guest_es_selector = evmcs->guest_es_selector;
|
|
vmcs12->guest_cs_selector = evmcs->guest_cs_selector;
|
|
vmcs12->guest_ss_selector = evmcs->guest_ss_selector;
|
|
vmcs12->guest_ds_selector = evmcs->guest_ds_selector;
|
|
vmcs12->guest_fs_selector = evmcs->guest_fs_selector;
|
|
vmcs12->guest_gs_selector = evmcs->guest_gs_selector;
|
|
vmcs12->guest_ldtr_selector = evmcs->guest_ldtr_selector;
|
|
vmcs12->guest_tr_selector = evmcs->guest_tr_selector;
|
|
}
|
|
|
|
if (unlikely(!(hv_clean_fields &
|
|
HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_GRP2))) {
|
|
vmcs12->tsc_offset = evmcs->tsc_offset;
|
|
vmcs12->virtual_apic_page_addr = evmcs->virtual_apic_page_addr;
|
|
vmcs12->xss_exit_bitmap = evmcs->xss_exit_bitmap;
|
|
}
|
|
|
|
if (unlikely(!(hv_clean_fields &
|
|
HV_VMX_ENLIGHTENED_CLEAN_FIELD_CRDR))) {
|
|
vmcs12->cr0_guest_host_mask = evmcs->cr0_guest_host_mask;
|
|
vmcs12->cr4_guest_host_mask = evmcs->cr4_guest_host_mask;
|
|
vmcs12->cr0_read_shadow = evmcs->cr0_read_shadow;
|
|
vmcs12->cr4_read_shadow = evmcs->cr4_read_shadow;
|
|
vmcs12->guest_cr0 = evmcs->guest_cr0;
|
|
vmcs12->guest_cr3 = evmcs->guest_cr3;
|
|
vmcs12->guest_cr4 = evmcs->guest_cr4;
|
|
vmcs12->guest_dr7 = evmcs->guest_dr7;
|
|
}
|
|
|
|
if (unlikely(!(hv_clean_fields &
|
|
HV_VMX_ENLIGHTENED_CLEAN_FIELD_HOST_POINTER))) {
|
|
vmcs12->host_fs_base = evmcs->host_fs_base;
|
|
vmcs12->host_gs_base = evmcs->host_gs_base;
|
|
vmcs12->host_tr_base = evmcs->host_tr_base;
|
|
vmcs12->host_gdtr_base = evmcs->host_gdtr_base;
|
|
vmcs12->host_idtr_base = evmcs->host_idtr_base;
|
|
vmcs12->host_rsp = evmcs->host_rsp;
|
|
}
|
|
|
|
if (unlikely(!(hv_clean_fields &
|
|
HV_VMX_ENLIGHTENED_CLEAN_FIELD_CONTROL_XLAT))) {
|
|
vmcs12->ept_pointer = evmcs->ept_pointer;
|
|
vmcs12->virtual_processor_id = evmcs->virtual_processor_id;
|
|
}
|
|
|
|
if (unlikely(!(hv_clean_fields &
|
|
HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1))) {
|
|
vmcs12->vmcs_link_pointer = evmcs->vmcs_link_pointer;
|
|
vmcs12->guest_ia32_debugctl = evmcs->guest_ia32_debugctl;
|
|
vmcs12->guest_ia32_pat = evmcs->guest_ia32_pat;
|
|
vmcs12->guest_ia32_efer = evmcs->guest_ia32_efer;
|
|
vmcs12->guest_pdptr0 = evmcs->guest_pdptr0;
|
|
vmcs12->guest_pdptr1 = evmcs->guest_pdptr1;
|
|
vmcs12->guest_pdptr2 = evmcs->guest_pdptr2;
|
|
vmcs12->guest_pdptr3 = evmcs->guest_pdptr3;
|
|
vmcs12->guest_pending_dbg_exceptions =
|
|
evmcs->guest_pending_dbg_exceptions;
|
|
vmcs12->guest_sysenter_esp = evmcs->guest_sysenter_esp;
|
|
vmcs12->guest_sysenter_eip = evmcs->guest_sysenter_eip;
|
|
vmcs12->guest_bndcfgs = evmcs->guest_bndcfgs;
|
|
vmcs12->guest_activity_state = evmcs->guest_activity_state;
|
|
vmcs12->guest_sysenter_cs = evmcs->guest_sysenter_cs;
|
|
}
|
|
|
|
/*
|
|
* Not used?
|
|
* vmcs12->vm_exit_msr_store_addr = evmcs->vm_exit_msr_store_addr;
|
|
* vmcs12->vm_exit_msr_load_addr = evmcs->vm_exit_msr_load_addr;
|
|
* vmcs12->vm_entry_msr_load_addr = evmcs->vm_entry_msr_load_addr;
|
|
* vmcs12->page_fault_error_code_mask =
|
|
* evmcs->page_fault_error_code_mask;
|
|
* vmcs12->page_fault_error_code_match =
|
|
* evmcs->page_fault_error_code_match;
|
|
* vmcs12->cr3_target_count = evmcs->cr3_target_count;
|
|
* vmcs12->vm_exit_msr_store_count = evmcs->vm_exit_msr_store_count;
|
|
* vmcs12->vm_exit_msr_load_count = evmcs->vm_exit_msr_load_count;
|
|
* vmcs12->vm_entry_msr_load_count = evmcs->vm_entry_msr_load_count;
|
|
*/
|
|
|
|
/*
|
|
* Read only fields:
|
|
* vmcs12->guest_physical_address = evmcs->guest_physical_address;
|
|
* vmcs12->vm_instruction_error = evmcs->vm_instruction_error;
|
|
* vmcs12->vm_exit_reason = evmcs->vm_exit_reason;
|
|
* vmcs12->vm_exit_intr_info = evmcs->vm_exit_intr_info;
|
|
* vmcs12->vm_exit_intr_error_code = evmcs->vm_exit_intr_error_code;
|
|
* vmcs12->idt_vectoring_info_field = evmcs->idt_vectoring_info_field;
|
|
* vmcs12->idt_vectoring_error_code = evmcs->idt_vectoring_error_code;
|
|
* vmcs12->vm_exit_instruction_len = evmcs->vm_exit_instruction_len;
|
|
* vmcs12->vmx_instruction_info = evmcs->vmx_instruction_info;
|
|
* vmcs12->exit_qualification = evmcs->exit_qualification;
|
|
* vmcs12->guest_linear_address = evmcs->guest_linear_address;
|
|
*
|
|
* Not present in struct vmcs12:
|
|
* vmcs12->exit_io_instruction_ecx = evmcs->exit_io_instruction_ecx;
|
|
* vmcs12->exit_io_instruction_esi = evmcs->exit_io_instruction_esi;
|
|
* vmcs12->exit_io_instruction_edi = evmcs->exit_io_instruction_edi;
|
|
* vmcs12->exit_io_instruction_eip = evmcs->exit_io_instruction_eip;
|
|
*/
|
|
|
|
return;
|
|
}
|
|
|
|
static void copy_vmcs12_to_enlightened(struct vcpu_vmx *vmx)
|
|
{
|
|
struct vmcs12 *vmcs12 = vmx->nested.cached_vmcs12;
|
|
struct hv_enlightened_vmcs *evmcs = vmx->nested.hv_evmcs;
|
|
|
|
/*
|
|
* Should not be changed by KVM:
|
|
*
|
|
* evmcs->host_es_selector = vmcs12->host_es_selector;
|
|
* evmcs->host_cs_selector = vmcs12->host_cs_selector;
|
|
* evmcs->host_ss_selector = vmcs12->host_ss_selector;
|
|
* evmcs->host_ds_selector = vmcs12->host_ds_selector;
|
|
* evmcs->host_fs_selector = vmcs12->host_fs_selector;
|
|
* evmcs->host_gs_selector = vmcs12->host_gs_selector;
|
|
* evmcs->host_tr_selector = vmcs12->host_tr_selector;
|
|
* evmcs->host_ia32_pat = vmcs12->host_ia32_pat;
|
|
* evmcs->host_ia32_efer = vmcs12->host_ia32_efer;
|
|
* evmcs->host_cr0 = vmcs12->host_cr0;
|
|
* evmcs->host_cr3 = vmcs12->host_cr3;
|
|
* evmcs->host_cr4 = vmcs12->host_cr4;
|
|
* evmcs->host_ia32_sysenter_esp = vmcs12->host_ia32_sysenter_esp;
|
|
* evmcs->host_ia32_sysenter_eip = vmcs12->host_ia32_sysenter_eip;
|
|
* evmcs->host_rip = vmcs12->host_rip;
|
|
* evmcs->host_ia32_sysenter_cs = vmcs12->host_ia32_sysenter_cs;
|
|
* evmcs->host_fs_base = vmcs12->host_fs_base;
|
|
* evmcs->host_gs_base = vmcs12->host_gs_base;
|
|
* evmcs->host_tr_base = vmcs12->host_tr_base;
|
|
* evmcs->host_gdtr_base = vmcs12->host_gdtr_base;
|
|
* evmcs->host_idtr_base = vmcs12->host_idtr_base;
|
|
* evmcs->host_rsp = vmcs12->host_rsp;
|
|
* sync_vmcs02_to_vmcs12() doesn't read these:
|
|
* evmcs->io_bitmap_a = vmcs12->io_bitmap_a;
|
|
* evmcs->io_bitmap_b = vmcs12->io_bitmap_b;
|
|
* evmcs->msr_bitmap = vmcs12->msr_bitmap;
|
|
* evmcs->ept_pointer = vmcs12->ept_pointer;
|
|
* evmcs->xss_exit_bitmap = vmcs12->xss_exit_bitmap;
|
|
* evmcs->vm_exit_msr_store_addr = vmcs12->vm_exit_msr_store_addr;
|
|
* evmcs->vm_exit_msr_load_addr = vmcs12->vm_exit_msr_load_addr;
|
|
* evmcs->vm_entry_msr_load_addr = vmcs12->vm_entry_msr_load_addr;
|
|
* evmcs->tpr_threshold = vmcs12->tpr_threshold;
|
|
* evmcs->virtual_processor_id = vmcs12->virtual_processor_id;
|
|
* evmcs->exception_bitmap = vmcs12->exception_bitmap;
|
|
* evmcs->vmcs_link_pointer = vmcs12->vmcs_link_pointer;
|
|
* evmcs->pin_based_vm_exec_control = vmcs12->pin_based_vm_exec_control;
|
|
* evmcs->vm_exit_controls = vmcs12->vm_exit_controls;
|
|
* evmcs->secondary_vm_exec_control = vmcs12->secondary_vm_exec_control;
|
|
* evmcs->page_fault_error_code_mask =
|
|
* vmcs12->page_fault_error_code_mask;
|
|
* evmcs->page_fault_error_code_match =
|
|
* vmcs12->page_fault_error_code_match;
|
|
* evmcs->cr3_target_count = vmcs12->cr3_target_count;
|
|
* evmcs->virtual_apic_page_addr = vmcs12->virtual_apic_page_addr;
|
|
* evmcs->tsc_offset = vmcs12->tsc_offset;
|
|
* evmcs->guest_ia32_debugctl = vmcs12->guest_ia32_debugctl;
|
|
* evmcs->cr0_guest_host_mask = vmcs12->cr0_guest_host_mask;
|
|
* evmcs->cr4_guest_host_mask = vmcs12->cr4_guest_host_mask;
|
|
* evmcs->cr0_read_shadow = vmcs12->cr0_read_shadow;
|
|
* evmcs->cr4_read_shadow = vmcs12->cr4_read_shadow;
|
|
* evmcs->vm_exit_msr_store_count = vmcs12->vm_exit_msr_store_count;
|
|
* evmcs->vm_exit_msr_load_count = vmcs12->vm_exit_msr_load_count;
|
|
* evmcs->vm_entry_msr_load_count = vmcs12->vm_entry_msr_load_count;
|
|
*
|
|
* Not present in struct vmcs12:
|
|
* evmcs->exit_io_instruction_ecx = vmcs12->exit_io_instruction_ecx;
|
|
* evmcs->exit_io_instruction_esi = vmcs12->exit_io_instruction_esi;
|
|
* evmcs->exit_io_instruction_edi = vmcs12->exit_io_instruction_edi;
|
|
* evmcs->exit_io_instruction_eip = vmcs12->exit_io_instruction_eip;
|
|
*/
|
|
|
|
evmcs->guest_es_selector = vmcs12->guest_es_selector;
|
|
evmcs->guest_cs_selector = vmcs12->guest_cs_selector;
|
|
evmcs->guest_ss_selector = vmcs12->guest_ss_selector;
|
|
evmcs->guest_ds_selector = vmcs12->guest_ds_selector;
|
|
evmcs->guest_fs_selector = vmcs12->guest_fs_selector;
|
|
evmcs->guest_gs_selector = vmcs12->guest_gs_selector;
|
|
evmcs->guest_ldtr_selector = vmcs12->guest_ldtr_selector;
|
|
evmcs->guest_tr_selector = vmcs12->guest_tr_selector;
|
|
|
|
evmcs->guest_es_limit = vmcs12->guest_es_limit;
|
|
evmcs->guest_cs_limit = vmcs12->guest_cs_limit;
|
|
evmcs->guest_ss_limit = vmcs12->guest_ss_limit;
|
|
evmcs->guest_ds_limit = vmcs12->guest_ds_limit;
|
|
evmcs->guest_fs_limit = vmcs12->guest_fs_limit;
|
|
evmcs->guest_gs_limit = vmcs12->guest_gs_limit;
|
|
evmcs->guest_ldtr_limit = vmcs12->guest_ldtr_limit;
|
|
evmcs->guest_tr_limit = vmcs12->guest_tr_limit;
|
|
evmcs->guest_gdtr_limit = vmcs12->guest_gdtr_limit;
|
|
evmcs->guest_idtr_limit = vmcs12->guest_idtr_limit;
|
|
|
|
evmcs->guest_es_ar_bytes = vmcs12->guest_es_ar_bytes;
|
|
evmcs->guest_cs_ar_bytes = vmcs12->guest_cs_ar_bytes;
|
|
evmcs->guest_ss_ar_bytes = vmcs12->guest_ss_ar_bytes;
|
|
evmcs->guest_ds_ar_bytes = vmcs12->guest_ds_ar_bytes;
|
|
evmcs->guest_fs_ar_bytes = vmcs12->guest_fs_ar_bytes;
|
|
evmcs->guest_gs_ar_bytes = vmcs12->guest_gs_ar_bytes;
|
|
evmcs->guest_ldtr_ar_bytes = vmcs12->guest_ldtr_ar_bytes;
|
|
evmcs->guest_tr_ar_bytes = vmcs12->guest_tr_ar_bytes;
|
|
|
|
evmcs->guest_es_base = vmcs12->guest_es_base;
|
|
evmcs->guest_cs_base = vmcs12->guest_cs_base;
|
|
evmcs->guest_ss_base = vmcs12->guest_ss_base;
|
|
evmcs->guest_ds_base = vmcs12->guest_ds_base;
|
|
evmcs->guest_fs_base = vmcs12->guest_fs_base;
|
|
evmcs->guest_gs_base = vmcs12->guest_gs_base;
|
|
evmcs->guest_ldtr_base = vmcs12->guest_ldtr_base;
|
|
evmcs->guest_tr_base = vmcs12->guest_tr_base;
|
|
evmcs->guest_gdtr_base = vmcs12->guest_gdtr_base;
|
|
evmcs->guest_idtr_base = vmcs12->guest_idtr_base;
|
|
|
|
evmcs->guest_ia32_pat = vmcs12->guest_ia32_pat;
|
|
evmcs->guest_ia32_efer = vmcs12->guest_ia32_efer;
|
|
|
|
evmcs->guest_pdptr0 = vmcs12->guest_pdptr0;
|
|
evmcs->guest_pdptr1 = vmcs12->guest_pdptr1;
|
|
evmcs->guest_pdptr2 = vmcs12->guest_pdptr2;
|
|
evmcs->guest_pdptr3 = vmcs12->guest_pdptr3;
|
|
|
|
evmcs->guest_pending_dbg_exceptions =
|
|
vmcs12->guest_pending_dbg_exceptions;
|
|
evmcs->guest_sysenter_esp = vmcs12->guest_sysenter_esp;
|
|
evmcs->guest_sysenter_eip = vmcs12->guest_sysenter_eip;
|
|
|
|
evmcs->guest_activity_state = vmcs12->guest_activity_state;
|
|
evmcs->guest_sysenter_cs = vmcs12->guest_sysenter_cs;
|
|
|
|
evmcs->guest_cr0 = vmcs12->guest_cr0;
|
|
evmcs->guest_cr3 = vmcs12->guest_cr3;
|
|
evmcs->guest_cr4 = vmcs12->guest_cr4;
|
|
evmcs->guest_dr7 = vmcs12->guest_dr7;
|
|
|
|
evmcs->guest_physical_address = vmcs12->guest_physical_address;
|
|
|
|
evmcs->vm_instruction_error = vmcs12->vm_instruction_error;
|
|
evmcs->vm_exit_reason = vmcs12->vm_exit_reason;
|
|
evmcs->vm_exit_intr_info = vmcs12->vm_exit_intr_info;
|
|
evmcs->vm_exit_intr_error_code = vmcs12->vm_exit_intr_error_code;
|
|
evmcs->idt_vectoring_info_field = vmcs12->idt_vectoring_info_field;
|
|
evmcs->idt_vectoring_error_code = vmcs12->idt_vectoring_error_code;
|
|
evmcs->vm_exit_instruction_len = vmcs12->vm_exit_instruction_len;
|
|
evmcs->vmx_instruction_info = vmcs12->vmx_instruction_info;
|
|
|
|
evmcs->exit_qualification = vmcs12->exit_qualification;
|
|
|
|
evmcs->guest_linear_address = vmcs12->guest_linear_address;
|
|
evmcs->guest_rsp = vmcs12->guest_rsp;
|
|
evmcs->guest_rflags = vmcs12->guest_rflags;
|
|
|
|
evmcs->guest_interruptibility_info =
|
|
vmcs12->guest_interruptibility_info;
|
|
evmcs->cpu_based_vm_exec_control = vmcs12->cpu_based_vm_exec_control;
|
|
evmcs->vm_entry_controls = vmcs12->vm_entry_controls;
|
|
evmcs->vm_entry_intr_info_field = vmcs12->vm_entry_intr_info_field;
|
|
evmcs->vm_entry_exception_error_code =
|
|
vmcs12->vm_entry_exception_error_code;
|
|
evmcs->vm_entry_instruction_len = vmcs12->vm_entry_instruction_len;
|
|
|
|
evmcs->guest_rip = vmcs12->guest_rip;
|
|
|
|
evmcs->guest_bndcfgs = vmcs12->guest_bndcfgs;
|
|
|
|
return;
|
|
}
|
|
|
|
/*
|
|
* This is an equivalent of the nested hypervisor executing the vmptrld
|
|
* instruction.
|
|
*/
|
|
static enum nested_evmptrld_status nested_vmx_handle_enlightened_vmptrld(
|
|
struct kvm_vcpu *vcpu, bool from_launch)
|
|
{
|
|
struct vcpu_vmx *vmx = to_vmx(vcpu);
|
|
bool evmcs_gpa_changed = false;
|
|
u64 evmcs_gpa;
|
|
|
|
if (likely(!vmx->nested.enlightened_vmcs_enabled))
|
|
return EVMPTRLD_DISABLED;
|
|
|
|
if (!nested_enlightened_vmentry(vcpu, &evmcs_gpa)) {
|
|
nested_release_evmcs(vcpu);
|
|
return EVMPTRLD_DISABLED;
|
|
}
|
|
|
|
if (unlikely(evmcs_gpa != vmx->nested.hv_evmcs_vmptr)) {
|
|
vmx->nested.current_vmptr = -1ull;
|
|
|
|
nested_release_evmcs(vcpu);
|
|
|
|
if (kvm_vcpu_map(vcpu, gpa_to_gfn(evmcs_gpa),
|
|
&vmx->nested.hv_evmcs_map))
|
|
return EVMPTRLD_ERROR;
|
|
|
|
vmx->nested.hv_evmcs = vmx->nested.hv_evmcs_map.hva;
|
|
|
|
/*
|
|
* Currently, KVM only supports eVMCS version 1
|
|
* (== KVM_EVMCS_VERSION) and thus we expect guest to set this
|
|
* value to first u32 field of eVMCS which should specify eVMCS
|
|
* VersionNumber.
|
|
*
|
|
* Guest should be aware of supported eVMCS versions by host by
|
|
* examining CPUID.0x4000000A.EAX[0:15]. Host userspace VMM is
|
|
* expected to set this CPUID leaf according to the value
|
|
* returned in vmcs_version from nested_enable_evmcs().
|
|
*
|
|
* However, it turns out that Microsoft Hyper-V fails to comply
|
|
* to their own invented interface: When Hyper-V use eVMCS, it
|
|
* just sets first u32 field of eVMCS to revision_id specified
|
|
* in MSR_IA32_VMX_BASIC. Instead of used eVMCS version number
|
|
* which is one of the supported versions specified in
|
|
* CPUID.0x4000000A.EAX[0:15].
|
|
*
|
|
* To overcome Hyper-V bug, we accept here either a supported
|
|
* eVMCS version or VMCS12 revision_id as valid values for first
|
|
* u32 field of eVMCS.
|
|
*/
|
|
if ((vmx->nested.hv_evmcs->revision_id != KVM_EVMCS_VERSION) &&
|
|
(vmx->nested.hv_evmcs->revision_id != VMCS12_REVISION)) {
|
|
nested_release_evmcs(vcpu);
|
|
return EVMPTRLD_VMFAIL;
|
|
}
|
|
|
|
vmx->nested.hv_evmcs_vmptr = evmcs_gpa;
|
|
|
|
evmcs_gpa_changed = true;
|
|
/*
|
|
* Unlike normal vmcs12, enlightened vmcs12 is not fully
|
|
* reloaded from guest's memory (read only fields, fields not
|
|
* present in struct hv_enlightened_vmcs, ...). Make sure there
|
|
* are no leftovers.
|
|
*/
|
|
if (from_launch) {
|
|
struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
|
|
memset(vmcs12, 0, sizeof(*vmcs12));
|
|
vmcs12->hdr.revision_id = VMCS12_REVISION;
|
|
}
|
|
|
|
}
|
|
|
|
/*
|
|
* Clean fields data can't be used on VMLAUNCH and when we switch
|
|
* between different L2 guests as KVM keeps a single VMCS12 per L1.
|
|
*/
|
|
if (from_launch || evmcs_gpa_changed)
|
|
vmx->nested.hv_evmcs->hv_clean_fields &=
|
|
~HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL;
|
|
|
|
return EVMPTRLD_SUCCEEDED;
|
|
}
|
|
|
|
void nested_sync_vmcs12_to_shadow(struct kvm_vcpu *vcpu)
|
|
{
|
|
struct vcpu_vmx *vmx = to_vmx(vcpu);
|
|
|
|
if (evmptr_is_valid(vmx->nested.hv_evmcs_vmptr))
|
|
copy_vmcs12_to_enlightened(vmx);
|
|
else
|
|
copy_vmcs12_to_shadow(vmx);
|
|
|
|
vmx->nested.need_vmcs12_to_shadow_sync = false;
|
|
}
|
|
|
|
static enum hrtimer_restart vmx_preemption_timer_fn(struct hrtimer *timer)
|
|
{
|
|
struct vcpu_vmx *vmx =
|
|
container_of(timer, struct vcpu_vmx, nested.preemption_timer);
|
|
|
|
vmx->nested.preemption_timer_expired = true;
|
|
kvm_make_request(KVM_REQ_EVENT, &vmx->vcpu);
|
|
kvm_vcpu_kick(&vmx->vcpu);
|
|
|
|
return HRTIMER_NORESTART;
|
|
}
|
|
|
|
static u64 vmx_calc_preemption_timer_value(struct kvm_vcpu *vcpu)
|
|
{
|
|
struct vcpu_vmx *vmx = to_vmx(vcpu);
|
|
struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
|
|
|
|
u64 l1_scaled_tsc = kvm_read_l1_tsc(vcpu, rdtsc()) >>
|
|
VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE;
|
|
|
|
if (!vmx->nested.has_preemption_timer_deadline) {
|
|
vmx->nested.preemption_timer_deadline =
|
|
vmcs12->vmx_preemption_timer_value + l1_scaled_tsc;
|
|
vmx->nested.has_preemption_timer_deadline = true;
|
|
}
|
|
return vmx->nested.preemption_timer_deadline - l1_scaled_tsc;
|
|
}
|
|
|
|
static void vmx_start_preemption_timer(struct kvm_vcpu *vcpu,
|
|
u64 preemption_timeout)
|
|
{
|
|
struct vcpu_vmx *vmx = to_vmx(vcpu);
|
|
|
|
/*
|
|
* A timer value of zero is architecturally guaranteed to cause
|
|
* a VMExit prior to executing any instructions in the guest.
|
|
*/
|
|
if (preemption_timeout == 0) {
|
|
vmx_preemption_timer_fn(&vmx->nested.preemption_timer);
|
|
return;
|
|
}
|
|
|
|
if (vcpu->arch.virtual_tsc_khz == 0)
|
|
return;
|
|
|
|
preemption_timeout <<= VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE;
|
|
preemption_timeout *= 1000000;
|
|
do_div(preemption_timeout, vcpu->arch.virtual_tsc_khz);
|
|
hrtimer_start(&vmx->nested.preemption_timer,
|
|
ktime_add_ns(ktime_get(), preemption_timeout),
|
|
HRTIMER_MODE_ABS_PINNED);
|
|
}
|
|
|
|
static u64 nested_vmx_calc_efer(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12)
|
|
{
|
|
if (vmx->nested.nested_run_pending &&
|
|
(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_EFER))
|
|
return vmcs12->guest_ia32_efer;
|
|
else if (vmcs12->vm_entry_controls & VM_ENTRY_IA32E_MODE)
|
|
return vmx->vcpu.arch.efer | (EFER_LMA | EFER_LME);
|
|
else
|
|
return vmx->vcpu.arch.efer & ~(EFER_LMA | EFER_LME);
|
|
}
|
|
|
|
static void prepare_vmcs02_constant_state(struct vcpu_vmx *vmx)
|
|
{
|
|
/*
|
|
* If vmcs02 hasn't been initialized, set the constant vmcs02 state
|
|
* according to L0's settings (vmcs12 is irrelevant here). Host
|
|
* fields that come from L0 and are not constant, e.g. HOST_CR3,
|
|
* will be set as needed prior to VMLAUNCH/VMRESUME.
|
|
*/
|
|
if (vmx->nested.vmcs02_initialized)
|
|
return;
|
|
vmx->nested.vmcs02_initialized = true;
|
|
|
|
/*
|
|
* We don't care what the EPTP value is we just need to guarantee
|
|
* it's valid so we don't get a false positive when doing early
|
|
* consistency checks.
|
|
*/
|
|
if (enable_ept && nested_early_check)
|
|
vmcs_write64(EPT_POINTER,
|
|
construct_eptp(&vmx->vcpu, 0, PT64_ROOT_4LEVEL));
|
|
|
|
/* All VMFUNCs are currently emulated through L0 vmexits. */
|
|
if (cpu_has_vmx_vmfunc())
|
|
vmcs_write64(VM_FUNCTION_CONTROL, 0);
|
|
|
|
if (cpu_has_vmx_posted_intr())
|
|
vmcs_write16(POSTED_INTR_NV, POSTED_INTR_NESTED_VECTOR);
|
|
|
|
if (cpu_has_vmx_msr_bitmap())
|
|
vmcs_write64(MSR_BITMAP, __pa(vmx->nested.vmcs02.msr_bitmap));
|
|
|
|
/*
|
|
* PML is emulated for L2, but never enabled in hardware as the MMU
|
|
* handles A/D emulation. Disabling PML for L2 also avoids having to
|
|
* deal with filtering out L2 GPAs from the buffer.
|
|
*/
|
|
if (enable_pml) {
|
|
vmcs_write64(PML_ADDRESS, 0);
|
|
vmcs_write16(GUEST_PML_INDEX, -1);
|
|
}
|
|
|
|
if (cpu_has_vmx_encls_vmexit())
|
|
vmcs_write64(ENCLS_EXITING_BITMAP, -1ull);
|
|
|
|
/*
|
|
* Set the MSR load/store lists to match L0's settings. Only the
|
|
* addresses are constant (for vmcs02), the counts can change based
|
|
* on L2's behavior, e.g. switching to/from long mode.
|
|
*/
|
|
vmcs_write64(VM_EXIT_MSR_STORE_ADDR, __pa(vmx->msr_autostore.guest.val));
|
|
vmcs_write64(VM_EXIT_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.host.val));
|
|
vmcs_write64(VM_ENTRY_MSR_LOAD_ADDR, __pa(vmx->msr_autoload.guest.val));
|
|
|
|
vmx_set_constant_host_state(vmx);
|
|
}
|
|
|
|
static void prepare_vmcs02_early_rare(struct vcpu_vmx *vmx,
|
|
struct vmcs12 *vmcs12)
|
|
{
|
|
prepare_vmcs02_constant_state(vmx);
|
|
|
|
vmcs_write64(VMCS_LINK_POINTER, -1ull);
|
|
|
|
if (enable_vpid) {
|
|
if (nested_cpu_has_vpid(vmcs12) && vmx->nested.vpid02)
|
|
vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->nested.vpid02);
|
|
else
|
|
vmcs_write16(VIRTUAL_PROCESSOR_ID, vmx->vpid);
|
|
}
|
|
}
|
|
|
|
static void prepare_vmcs02_early(struct vcpu_vmx *vmx, struct loaded_vmcs *vmcs01,
|
|
struct vmcs12 *vmcs12)
|
|
{
|
|
u32 exec_control;
|
|
u64 guest_efer = nested_vmx_calc_efer(vmx, vmcs12);
|
|
|
|
if (vmx->nested.dirty_vmcs12 || evmptr_is_valid(vmx->nested.hv_evmcs_vmptr))
|
|
prepare_vmcs02_early_rare(vmx, vmcs12);
|
|
|
|
/*
|
|
* PIN CONTROLS
|
|
*/
|
|
exec_control = __pin_controls_get(vmcs01);
|
|
exec_control |= (vmcs12->pin_based_vm_exec_control &
|
|
~PIN_BASED_VMX_PREEMPTION_TIMER);
|
|
|
|
/* Posted interrupts setting is only taken from vmcs12. */
|
|
vmx->nested.pi_pending = false;
|
|
if (nested_cpu_has_posted_intr(vmcs12))
|
|
vmx->nested.posted_intr_nv = vmcs12->posted_intr_nv;
|
|
else
|
|
exec_control &= ~PIN_BASED_POSTED_INTR;
|
|
pin_controls_set(vmx, exec_control);
|
|
|
|
/*
|
|
* EXEC CONTROLS
|
|
*/
|
|
exec_control = __exec_controls_get(vmcs01); /* L0's desires */
|
|
exec_control &= ~CPU_BASED_INTR_WINDOW_EXITING;
|
|
exec_control &= ~CPU_BASED_NMI_WINDOW_EXITING;
|
|
exec_control &= ~CPU_BASED_TPR_SHADOW;
|
|
exec_control |= vmcs12->cpu_based_vm_exec_control;
|
|
|
|
vmx->nested.l1_tpr_threshold = -1;
|
|
if (exec_control & CPU_BASED_TPR_SHADOW)
|
|
vmcs_write32(TPR_THRESHOLD, vmcs12->tpr_threshold);
|
|
#ifdef CONFIG_X86_64
|
|
else
|
|
exec_control |= CPU_BASED_CR8_LOAD_EXITING |
|
|
CPU_BASED_CR8_STORE_EXITING;
|
|
#endif
|
|
|
|
/*
|
|
* A vmexit (to either L1 hypervisor or L0 userspace) is always needed
|
|
* for I/O port accesses.
|
|
*/
|
|
exec_control |= CPU_BASED_UNCOND_IO_EXITING;
|
|
exec_control &= ~CPU_BASED_USE_IO_BITMAPS;
|
|
|
|
/*
|
|
* This bit will be computed in nested_get_vmcs12_pages, because
|
|
* we do not have access to L1's MSR bitmap yet. For now, keep
|
|
* the same bit as before, hoping to avoid multiple VMWRITEs that
|
|
* only set/clear this bit.
|
|
*/
|
|
exec_control &= ~CPU_BASED_USE_MSR_BITMAPS;
|
|
exec_control |= exec_controls_get(vmx) & CPU_BASED_USE_MSR_BITMAPS;
|
|
|
|
exec_controls_set(vmx, exec_control);
|
|
|
|
/*
|
|
* SECONDARY EXEC CONTROLS
|
|
*/
|
|
if (cpu_has_secondary_exec_ctrls()) {
|
|
exec_control = __secondary_exec_controls_get(vmcs01);
|
|
|
|
/* Take the following fields only from vmcs12 */
|
|
exec_control &= ~(SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES |
|
|
SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE |
|
|
SECONDARY_EXEC_ENABLE_INVPCID |
|
|
SECONDARY_EXEC_ENABLE_RDTSCP |
|
|
SECONDARY_EXEC_XSAVES |
|
|
SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE |
|
|
SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
|
|
SECONDARY_EXEC_APIC_REGISTER_VIRT |
|
|
SECONDARY_EXEC_ENABLE_VMFUNC |
|
|
SECONDARY_EXEC_DESC);
|
|
|
|
if (nested_cpu_has(vmcs12,
|
|
CPU_BASED_ACTIVATE_SECONDARY_CONTROLS))
|
|
exec_control |= vmcs12->secondary_vm_exec_control;
|
|
|
|
/* PML is emulated and never enabled in hardware for L2. */
|
|
exec_control &= ~SECONDARY_EXEC_ENABLE_PML;
|
|
|
|
/* VMCS shadowing for L2 is emulated for now */
|
|
exec_control &= ~SECONDARY_EXEC_SHADOW_VMCS;
|
|
|
|
/*
|
|
* Preset *DT exiting when emulating UMIP, so that vmx_set_cr4()
|
|
* will not have to rewrite the controls just for this bit.
|
|
*/
|
|
if (!boot_cpu_has(X86_FEATURE_UMIP) && vmx_umip_emulated() &&
|
|
(vmcs12->guest_cr4 & X86_CR4_UMIP))
|
|
exec_control |= SECONDARY_EXEC_DESC;
|
|
|
|
if (exec_control & SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY)
|
|
vmcs_write16(GUEST_INTR_STATUS,
|
|
vmcs12->guest_intr_status);
|
|
|
|
if (!nested_cpu_has2(vmcs12, SECONDARY_EXEC_UNRESTRICTED_GUEST))
|
|
exec_control &= ~SECONDARY_EXEC_UNRESTRICTED_GUEST;
|
|
|
|
if (exec_control & SECONDARY_EXEC_ENCLS_EXITING)
|
|
vmx_write_encls_bitmap(&vmx->vcpu, vmcs12);
|
|
|
|
secondary_exec_controls_set(vmx, exec_control);
|
|
}
|
|
|
|
/*
|
|
* ENTRY CONTROLS
|
|
*
|
|
* vmcs12's VM_{ENTRY,EXIT}_LOAD_IA32_EFER and VM_ENTRY_IA32E_MODE
|
|
* are emulated by vmx_set_efer() in prepare_vmcs02(), but speculate
|
|
* on the related bits (if supported by the CPU) in the hope that
|
|
* we can avoid VMWrites during vmx_set_efer().
|
|
*
|
|
* Similarly, take vmcs01's PERF_GLOBAL_CTRL in the hope that if KVM is
|
|
* loading PERF_GLOBAL_CTRL via the VMCS for L1, then KVM will want to
|
|
* do the same for L2.
|
|
*/
|
|
exec_control = __vm_entry_controls_get(vmcs01);
|
|
exec_control |= (vmcs12->vm_entry_controls &
|
|
~VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL);
|
|
exec_control &= ~(VM_ENTRY_IA32E_MODE | VM_ENTRY_LOAD_IA32_EFER);
|
|
if (cpu_has_load_ia32_efer()) {
|
|
if (guest_efer & EFER_LMA)
|
|
exec_control |= VM_ENTRY_IA32E_MODE;
|
|
if (guest_efer != host_efer)
|
|
exec_control |= VM_ENTRY_LOAD_IA32_EFER;
|
|
}
|
|
vm_entry_controls_set(vmx, exec_control);
|
|
|
|
/*
|
|
* EXIT CONTROLS
|
|
*
|
|
* L2->L1 exit controls are emulated - the hardware exit is to L0 so
|
|
* we should use its exit controls. Note that VM_EXIT_LOAD_IA32_EFER
|
|
* bits may be modified by vmx_set_efer() in prepare_vmcs02().
|
|
*/
|
|
exec_control = __vm_exit_controls_get(vmcs01);
|
|
if (cpu_has_load_ia32_efer() && guest_efer != host_efer)
|
|
exec_control |= VM_EXIT_LOAD_IA32_EFER;
|
|
else
|
|
exec_control &= ~VM_EXIT_LOAD_IA32_EFER;
|
|
vm_exit_controls_set(vmx, exec_control);
|
|
|
|
/*
|
|
* Interrupt/Exception Fields
|
|
*/
|
|
if (vmx->nested.nested_run_pending) {
|
|
vmcs_write32(VM_ENTRY_INTR_INFO_FIELD,
|
|
vmcs12->vm_entry_intr_info_field);
|
|
vmcs_write32(VM_ENTRY_EXCEPTION_ERROR_CODE,
|
|
vmcs12->vm_entry_exception_error_code);
|
|
vmcs_write32(VM_ENTRY_INSTRUCTION_LEN,
|
|
vmcs12->vm_entry_instruction_len);
|
|
vmcs_write32(GUEST_INTERRUPTIBILITY_INFO,
|
|
vmcs12->guest_interruptibility_info);
|
|
vmx->loaded_vmcs->nmi_known_unmasked =
|
|
!(vmcs12->guest_interruptibility_info & GUEST_INTR_STATE_NMI);
|
|
} else {
|
|
vmcs_write32(VM_ENTRY_INTR_INFO_FIELD, 0);
|
|
}
|
|
}
|
|
|
|
static void prepare_vmcs02_rare(struct vcpu_vmx *vmx, struct vmcs12 *vmcs12)
|
|
{
|
|
struct hv_enlightened_vmcs *hv_evmcs = vmx->nested.hv_evmcs;
|
|
|
|
if (!hv_evmcs || !(hv_evmcs->hv_clean_fields &
|
|
HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP2)) {
|
|
vmcs_write16(GUEST_ES_SELECTOR, vmcs12->guest_es_selector);
|
|
vmcs_write16(GUEST_CS_SELECTOR, vmcs12->guest_cs_selector);
|
|
vmcs_write16(GUEST_SS_SELECTOR, vmcs12->guest_ss_selector);
|
|
vmcs_write16(GUEST_DS_SELECTOR, vmcs12->guest_ds_selector);
|
|
vmcs_write16(GUEST_FS_SELECTOR, vmcs12->guest_fs_selector);
|
|
vmcs_write16(GUEST_GS_SELECTOR, vmcs12->guest_gs_selector);
|
|
vmcs_write16(GUEST_LDTR_SELECTOR, vmcs12->guest_ldtr_selector);
|
|
vmcs_write16(GUEST_TR_SELECTOR, vmcs12->guest_tr_selector);
|
|
vmcs_write32(GUEST_ES_LIMIT, vmcs12->guest_es_limit);
|
|
vmcs_write32(GUEST_CS_LIMIT, vmcs12->guest_cs_limit);
|
|
vmcs_write32(GUEST_SS_LIMIT, vmcs12->guest_ss_limit);
|
|
vmcs_write32(GUEST_DS_LIMIT, vmcs12->guest_ds_limit);
|
|
vmcs_write32(GUEST_FS_LIMIT, vmcs12->guest_fs_limit);
|
|
vmcs_write32(GUEST_GS_LIMIT, vmcs12->guest_gs_limit);
|
|
vmcs_write32(GUEST_LDTR_LIMIT, vmcs12->guest_ldtr_limit);
|
|
vmcs_write32(GUEST_TR_LIMIT, vmcs12->guest_tr_limit);
|
|
vmcs_write32(GUEST_GDTR_LIMIT, vmcs12->guest_gdtr_limit);
|
|
vmcs_write32(GUEST_IDTR_LIMIT, vmcs12->guest_idtr_limit);
|
|
vmcs_write32(GUEST_CS_AR_BYTES, vmcs12->guest_cs_ar_bytes);
|
|
vmcs_write32(GUEST_SS_AR_BYTES, vmcs12->guest_ss_ar_bytes);
|
|
vmcs_write32(GUEST_ES_AR_BYTES, vmcs12->guest_es_ar_bytes);
|
|
vmcs_write32(GUEST_DS_AR_BYTES, vmcs12->guest_ds_ar_bytes);
|
|
vmcs_write32(GUEST_FS_AR_BYTES, vmcs12->guest_fs_ar_bytes);
|
|
vmcs_write32(GUEST_GS_AR_BYTES, vmcs12->guest_gs_ar_bytes);
|
|
vmcs_write32(GUEST_LDTR_AR_BYTES, vmcs12->guest_ldtr_ar_bytes);
|
|
vmcs_write32(GUEST_TR_AR_BYTES, vmcs12->guest_tr_ar_bytes);
|
|
vmcs_writel(GUEST_ES_BASE, vmcs12->guest_es_base);
|
|
vmcs_writel(GUEST_CS_BASE, vmcs12->guest_cs_base);
|
|
vmcs_writel(GUEST_SS_BASE, vmcs12->guest_ss_base);
|
|
vmcs_writel(GUEST_DS_BASE, vmcs12->guest_ds_base);
|
|
vmcs_writel(GUEST_FS_BASE, vmcs12->guest_fs_base);
|
|
vmcs_writel(GUEST_GS_BASE, vmcs12->guest_gs_base);
|
|
vmcs_writel(GUEST_LDTR_BASE, vmcs12->guest_ldtr_base);
|
|
vmcs_writel(GUEST_TR_BASE, vmcs12->guest_tr_base);
|
|
vmcs_writel(GUEST_GDTR_BASE, vmcs12->guest_gdtr_base);
|
|
vmcs_writel(GUEST_IDTR_BASE, vmcs12->guest_idtr_base);
|
|
|
|
vmx->segment_cache.bitmask = 0;
|
|
}
|
|
|
|
if (!hv_evmcs || !(hv_evmcs->hv_clean_fields &
|
|
HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1)) {
|
|
vmcs_write32(GUEST_SYSENTER_CS, vmcs12->guest_sysenter_cs);
|
|
vmcs_writel(GUEST_PENDING_DBG_EXCEPTIONS,
|
|
vmcs12->guest_pending_dbg_exceptions);
|
|
vmcs_writel(GUEST_SYSENTER_ESP, vmcs12->guest_sysenter_esp);
|
|
vmcs_writel(GUEST_SYSENTER_EIP, vmcs12->guest_sysenter_eip);
|
|
|
|
/*
|
|
* L1 may access the L2's PDPTR, so save them to construct
|
|
* vmcs12
|
|
*/
|
|
if (enable_ept) {
|
|
vmcs_write64(GUEST_PDPTR0, vmcs12->guest_pdptr0);
|
|
vmcs_write64(GUEST_PDPTR1, vmcs12->guest_pdptr1);
|
|
vmcs_write64(GUEST_PDPTR2, vmcs12->guest_pdptr2);
|
|
vmcs_write64(GUEST_PDPTR3, vmcs12->guest_pdptr3);
|
|
}
|
|
|
|
if (kvm_mpx_supported() && vmx->nested.nested_run_pending &&
|
|
(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS))
|
|
vmcs_write64(GUEST_BNDCFGS, vmcs12->guest_bndcfgs);
|
|
}
|
|
|
|
if (nested_cpu_has_xsaves(vmcs12))
|
|
vmcs_write64(XSS_EXIT_BITMAP, vmcs12->xss_exit_bitmap);
|
|
|
|
/*
|
|
* Whether page-faults are trapped is determined by a combination of
|
|
* 3 settings: PFEC_MASK, PFEC_MATCH and EXCEPTION_BITMAP.PF. If L0
|
|
* doesn't care about page faults then we should set all of these to
|
|
* L1's desires. However, if L0 does care about (some) page faults, it
|
|
* is not easy (if at all possible?) to merge L0 and L1's desires, we
|
|
* simply ask to exit on each and every L2 page fault. This is done by
|
|
* setting MASK=MATCH=0 and (see below) EB.PF=1.
|
|
* Note that below we don't need special code to set EB.PF beyond the
|
|
* "or"ing of the EB of vmcs01 and vmcs12, because when enable_ept,
|
|
* vmcs01's EB.PF is 0 so the "or" will take vmcs12's value, and when
|
|
* !enable_ept, EB.PF is 1, so the "or" will always be 1.
|
|
*/
|
|
if (vmx_need_pf_intercept(&vmx->vcpu)) {
|
|
/*
|
|
* TODO: if both L0 and L1 need the same MASK and MATCH,
|
|
* go ahead and use it?
|
|
*/
|
|
vmcs_write32(PAGE_FAULT_ERROR_CODE_MASK, 0);
|
|
vmcs_write32(PAGE_FAULT_ERROR_CODE_MATCH, 0);
|
|
} else {
|
|
vmcs_write32(PAGE_FAULT_ERROR_CODE_MASK, vmcs12->page_fault_error_code_mask);
|
|
vmcs_write32(PAGE_FAULT_ERROR_CODE_MATCH, vmcs12->page_fault_error_code_match);
|
|
}
|
|
|
|
if (cpu_has_vmx_apicv()) {
|
|
vmcs_write64(EOI_EXIT_BITMAP0, vmcs12->eoi_exit_bitmap0);
|
|
vmcs_write64(EOI_EXIT_BITMAP1, vmcs12->eoi_exit_bitmap1);
|
|
vmcs_write64(EOI_EXIT_BITMAP2, vmcs12->eoi_exit_bitmap2);
|
|
vmcs_write64(EOI_EXIT_BITMAP3, vmcs12->eoi_exit_bitmap3);
|
|
}
|
|
|
|
/*
|
|
* Make sure the msr_autostore list is up to date before we set the
|
|
* count in the vmcs02.
|
|
*/
|
|
prepare_vmx_msr_autostore_list(&vmx->vcpu, MSR_IA32_TSC);
|
|
|
|
vmcs_write32(VM_EXIT_MSR_STORE_COUNT, vmx->msr_autostore.guest.nr);
|
|
vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.host.nr);
|
|
vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, vmx->msr_autoload.guest.nr);
|
|
|
|
set_cr4_guest_host_mask(vmx);
|
|
}
|
|
|
|
/*
|
|
* prepare_vmcs02 is called when the L1 guest hypervisor runs its nested
|
|
* L2 guest. L1 has a vmcs for L2 (vmcs12), and this function "merges" it
|
|
* with L0's requirements for its guest (a.k.a. vmcs01), so we can run the L2
|
|
* guest in a way that will both be appropriate to L1's requests, and our
|
|
* needs. In addition to modifying the active vmcs (which is vmcs02), this
|
|
* function also has additional necessary side-effects, like setting various
|
|
* vcpu->arch fields.
|
|
* Returns 0 on success, 1 on failure. Invalid state exit qualification code
|
|
* is assigned to entry_failure_code on failure.
|
|
*/
|
|
static int prepare_vmcs02(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
|
|
bool from_vmentry,
|
|
enum vm_entry_failure_code *entry_failure_code)
|
|
{
|
|
struct vcpu_vmx *vmx = to_vmx(vcpu);
|
|
bool load_guest_pdptrs_vmcs12 = false;
|
|
|
|
if (vmx->nested.dirty_vmcs12 || evmptr_is_valid(vmx->nested.hv_evmcs_vmptr)) {
|
|
prepare_vmcs02_rare(vmx, vmcs12);
|
|
vmx->nested.dirty_vmcs12 = false;
|
|
|
|
load_guest_pdptrs_vmcs12 = !evmptr_is_valid(vmx->nested.hv_evmcs_vmptr) ||
|
|
!(vmx->nested.hv_evmcs->hv_clean_fields &
|
|
HV_VMX_ENLIGHTENED_CLEAN_FIELD_GUEST_GRP1);
|
|
}
|
|
|
|
if (vmx->nested.nested_run_pending &&
|
|
(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS)) {
|
|
kvm_set_dr(vcpu, 7, vmcs12->guest_dr7);
|
|
vmcs_write64(GUEST_IA32_DEBUGCTL, vmcs12->guest_ia32_debugctl);
|
|
} else {
|
|
kvm_set_dr(vcpu, 7, vcpu->arch.dr7);
|
|
vmcs_write64(GUEST_IA32_DEBUGCTL, vmx->nested.vmcs01_debugctl);
|
|
}
|
|
if (kvm_mpx_supported() && (!vmx->nested.nested_run_pending ||
|
|
!(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS)))
|
|
vmcs_write64(GUEST_BNDCFGS, vmx->nested.vmcs01_guest_bndcfgs);
|
|
vmx_set_rflags(vcpu, vmcs12->guest_rflags);
|
|
|
|
/* EXCEPTION_BITMAP and CR0_GUEST_HOST_MASK should basically be the
|
|
* bitwise-or of what L1 wants to trap for L2, and what we want to
|
|
* trap. Note that CR0.TS also needs updating - we do this later.
|
|
*/
|
|
vmx_update_exception_bitmap(vcpu);
|
|
vcpu->arch.cr0_guest_owned_bits &= ~vmcs12->cr0_guest_host_mask;
|
|
vmcs_writel(CR0_GUEST_HOST_MASK, ~vcpu->arch.cr0_guest_owned_bits);
|
|
|
|
if (vmx->nested.nested_run_pending &&
|
|
(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_PAT)) {
|
|
vmcs_write64(GUEST_IA32_PAT, vmcs12->guest_ia32_pat);
|
|
vcpu->arch.pat = vmcs12->guest_ia32_pat;
|
|
} else if (vmcs_config.vmentry_ctrl & VM_ENTRY_LOAD_IA32_PAT) {
|
|
vmcs_write64(GUEST_IA32_PAT, vmx->vcpu.arch.pat);
|
|
}
|
|
|
|
vcpu->arch.tsc_offset = kvm_calc_nested_tsc_offset(
|
|
vcpu->arch.l1_tsc_offset,
|
|
vmx_get_l2_tsc_offset(vcpu),
|
|
vmx_get_l2_tsc_multiplier(vcpu));
|
|
|
|
vcpu->arch.tsc_scaling_ratio = kvm_calc_nested_tsc_multiplier(
|
|
vcpu->arch.l1_tsc_scaling_ratio,
|
|
vmx_get_l2_tsc_multiplier(vcpu));
|
|
|
|
vmcs_write64(TSC_OFFSET, vcpu->arch.tsc_offset);
|
|
if (kvm_has_tsc_control)
|
|
vmcs_write64(TSC_MULTIPLIER, vcpu->arch.tsc_scaling_ratio);
|
|
|
|
nested_vmx_transition_tlb_flush(vcpu, vmcs12, true);
|
|
|
|
if (nested_cpu_has_ept(vmcs12))
|
|
nested_ept_init_mmu_context(vcpu);
|
|
|
|
/*
|
|
* This sets GUEST_CR0 to vmcs12->guest_cr0, possibly modifying those
|
|
* bits which we consider mandatory enabled.
|
|
* The CR0_READ_SHADOW is what L2 should have expected to read given
|
|
* the specifications by L1; It's not enough to take
|
|
* vmcs12->cr0_read_shadow because on our cr0_guest_host_mask we we
|
|
* have more bits than L1 expected.
|
|
*/
|
|
vmx_set_cr0(vcpu, vmcs12->guest_cr0);
|
|
vmcs_writel(CR0_READ_SHADOW, nested_read_cr0(vmcs12));
|
|
|
|
vmx_set_cr4(vcpu, vmcs12->guest_cr4);
|
|
vmcs_writel(CR4_READ_SHADOW, nested_read_cr4(vmcs12));
|
|
|
|
vcpu->arch.efer = nested_vmx_calc_efer(vmx, vmcs12);
|
|
/* Note: may modify VM_ENTRY/EXIT_CONTROLS and GUEST/HOST_IA32_EFER */
|
|
vmx_set_efer(vcpu, vcpu->arch.efer);
|
|
|
|
/*
|
|
* Guest state is invalid and unrestricted guest is disabled,
|
|
* which means L1 attempted VMEntry to L2 with invalid state.
|
|
* Fail the VMEntry.
|
|
*
|
|
* However when force loading the guest state (SMM exit or
|
|
* loading nested state after migration, it is possible to
|
|
* have invalid guest state now, which will be later fixed by
|
|
* restoring L2 register state
|
|
*/
|
|
if (CC(from_vmentry && !vmx_guest_state_valid(vcpu))) {
|
|
*entry_failure_code = ENTRY_FAIL_DEFAULT;
|
|
return -EINVAL;
|
|
}
|
|
|
|
/* Shadow page tables on either EPT or shadow page tables. */
|
|
if (nested_vmx_load_cr3(vcpu, vmcs12->guest_cr3, nested_cpu_has_ept(vmcs12),
|
|
from_vmentry, entry_failure_code))
|
|
return -EINVAL;
|
|
|
|
/*
|
|
* Immediately write vmcs02.GUEST_CR3. It will be propagated to vmcs12
|
|
* on nested VM-Exit, which can occur without actually running L2 and
|
|
* thus without hitting vmx_load_mmu_pgd(), e.g. if L1 is entering L2 with
|
|
* vmcs12.GUEST_ACTIVITYSTATE=HLT, in which case KVM will intercept the
|
|
* transition to HLT instead of running L2.
|
|
*/
|
|
if (enable_ept)
|
|
vmcs_writel(GUEST_CR3, vmcs12->guest_cr3);
|
|
|
|
/* Late preparation of GUEST_PDPTRs now that EFER and CRs are set. */
|
|
if (load_guest_pdptrs_vmcs12 && nested_cpu_has_ept(vmcs12) &&
|
|
is_pae_paging(vcpu)) {
|
|
vmcs_write64(GUEST_PDPTR0, vmcs12->guest_pdptr0);
|
|
vmcs_write64(GUEST_PDPTR1, vmcs12->guest_pdptr1);
|
|
vmcs_write64(GUEST_PDPTR2, vmcs12->guest_pdptr2);
|
|
vmcs_write64(GUEST_PDPTR3, vmcs12->guest_pdptr3);
|
|
}
|
|
|
|
if (!enable_ept)
|
|
vcpu->arch.walk_mmu->inject_page_fault = vmx_inject_page_fault_nested;
|
|
|
|
if ((vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL) &&
|
|
intel_pmu_has_perf_global_ctrl(vcpu_to_pmu(vcpu)) &&
|
|
WARN_ON_ONCE(kvm_set_msr(vcpu, MSR_CORE_PERF_GLOBAL_CTRL,
|
|
vmcs12->guest_ia32_perf_global_ctrl))) {
|
|
*entry_failure_code = ENTRY_FAIL_DEFAULT;
|
|
return -EINVAL;
|
|
}
|
|
|
|
kvm_rsp_write(vcpu, vmcs12->guest_rsp);
|
|
kvm_rip_write(vcpu, vmcs12->guest_rip);
|
|
|
|
/*
|
|
* It was observed that genuine Hyper-V running in L1 doesn't reset
|
|
* 'hv_clean_fields' by itself, it only sets the corresponding dirty
|
|
* bits when it changes a field in eVMCS. Mark all fields as clean
|
|
* here.
|
|
*/
|
|
if (evmptr_is_valid(vmx->nested.hv_evmcs_vmptr))
|
|
vmx->nested.hv_evmcs->hv_clean_fields |=
|
|
HV_VMX_ENLIGHTENED_CLEAN_FIELD_ALL;
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int nested_vmx_check_nmi_controls(struct vmcs12 *vmcs12)
|
|
{
|
|
if (CC(!nested_cpu_has_nmi_exiting(vmcs12) &&
|
|
nested_cpu_has_virtual_nmis(vmcs12)))
|
|
return -EINVAL;
|
|
|
|
if (CC(!nested_cpu_has_virtual_nmis(vmcs12) &&
|
|
nested_cpu_has(vmcs12, CPU_BASED_NMI_WINDOW_EXITING)))
|
|
return -EINVAL;
|
|
|
|
return 0;
|
|
}
|
|
|
|
static bool nested_vmx_check_eptp(struct kvm_vcpu *vcpu, u64 new_eptp)
|
|
{
|
|
struct vcpu_vmx *vmx = to_vmx(vcpu);
|
|
|
|
/* Check for memory type validity */
|
|
switch (new_eptp & VMX_EPTP_MT_MASK) {
|
|
case VMX_EPTP_MT_UC:
|
|
if (CC(!(vmx->nested.msrs.ept_caps & VMX_EPTP_UC_BIT)))
|
|
return false;
|
|
break;
|
|
case VMX_EPTP_MT_WB:
|
|
if (CC(!(vmx->nested.msrs.ept_caps & VMX_EPTP_WB_BIT)))
|
|
return false;
|
|
break;
|
|
default:
|
|
return false;
|
|
}
|
|
|
|
/* Page-walk levels validity. */
|
|
switch (new_eptp & VMX_EPTP_PWL_MASK) {
|
|
case VMX_EPTP_PWL_5:
|
|
if (CC(!(vmx->nested.msrs.ept_caps & VMX_EPT_PAGE_WALK_5_BIT)))
|
|
return false;
|
|
break;
|
|
case VMX_EPTP_PWL_4:
|
|
if (CC(!(vmx->nested.msrs.ept_caps & VMX_EPT_PAGE_WALK_4_BIT)))
|
|
return false;
|
|
break;
|
|
default:
|
|
return false;
|
|
}
|
|
|
|
/* Reserved bits should not be set */
|
|
if (CC(kvm_vcpu_is_illegal_gpa(vcpu, new_eptp) || ((new_eptp >> 7) & 0x1f)))
|
|
return false;
|
|
|
|
/* AD, if set, should be supported */
|
|
if (new_eptp & VMX_EPTP_AD_ENABLE_BIT) {
|
|
if (CC(!(vmx->nested.msrs.ept_caps & VMX_EPT_AD_BIT)))
|
|
return false;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
/*
|
|
* Checks related to VM-Execution Control Fields
|
|
*/
|
|
static int nested_check_vm_execution_controls(struct kvm_vcpu *vcpu,
|
|
struct vmcs12 *vmcs12)
|
|
{
|
|
struct vcpu_vmx *vmx = to_vmx(vcpu);
|
|
|
|
if (CC(!vmx_control_verify(vmcs12->pin_based_vm_exec_control,
|
|
vmx->nested.msrs.pinbased_ctls_low,
|
|
vmx->nested.msrs.pinbased_ctls_high)) ||
|
|
CC(!vmx_control_verify(vmcs12->cpu_based_vm_exec_control,
|
|
vmx->nested.msrs.procbased_ctls_low,
|
|
vmx->nested.msrs.procbased_ctls_high)))
|
|
return -EINVAL;
|
|
|
|
if (nested_cpu_has(vmcs12, CPU_BASED_ACTIVATE_SECONDARY_CONTROLS) &&
|
|
CC(!vmx_control_verify(vmcs12->secondary_vm_exec_control,
|
|
vmx->nested.msrs.secondary_ctls_low,
|
|
vmx->nested.msrs.secondary_ctls_high)))
|
|
return -EINVAL;
|
|
|
|
if (CC(vmcs12->cr3_target_count > nested_cpu_vmx_misc_cr3_count(vcpu)) ||
|
|
nested_vmx_check_io_bitmap_controls(vcpu, vmcs12) ||
|
|
nested_vmx_check_msr_bitmap_controls(vcpu, vmcs12) ||
|
|
nested_vmx_check_tpr_shadow_controls(vcpu, vmcs12) ||
|
|
nested_vmx_check_apic_access_controls(vcpu, vmcs12) ||
|
|
nested_vmx_check_apicv_controls(vcpu, vmcs12) ||
|
|
nested_vmx_check_nmi_controls(vmcs12) ||
|
|
nested_vmx_check_pml_controls(vcpu, vmcs12) ||
|
|
nested_vmx_check_unrestricted_guest_controls(vcpu, vmcs12) ||
|
|
nested_vmx_check_mode_based_ept_exec_controls(vcpu, vmcs12) ||
|
|
nested_vmx_check_shadow_vmcs_controls(vcpu, vmcs12) ||
|
|
CC(nested_cpu_has_vpid(vmcs12) && !vmcs12->virtual_processor_id))
|
|
return -EINVAL;
|
|
|
|
if (!nested_cpu_has_preemption_timer(vmcs12) &&
|
|
nested_cpu_has_save_preemption_timer(vmcs12))
|
|
return -EINVAL;
|
|
|
|
if (nested_cpu_has_ept(vmcs12) &&
|
|
CC(!nested_vmx_check_eptp(vcpu, vmcs12->ept_pointer)))
|
|
return -EINVAL;
|
|
|
|
if (nested_cpu_has_vmfunc(vmcs12)) {
|
|
if (CC(vmcs12->vm_function_control &
|
|
~vmx->nested.msrs.vmfunc_controls))
|
|
return -EINVAL;
|
|
|
|
if (nested_cpu_has_eptp_switching(vmcs12)) {
|
|
if (CC(!nested_cpu_has_ept(vmcs12)) ||
|
|
CC(!page_address_valid(vcpu, vmcs12->eptp_list_address)))
|
|
return -EINVAL;
|
|
}
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* Checks related to VM-Exit Control Fields
|
|
*/
|
|
static int nested_check_vm_exit_controls(struct kvm_vcpu *vcpu,
|
|
struct vmcs12 *vmcs12)
|
|
{
|
|
struct vcpu_vmx *vmx = to_vmx(vcpu);
|
|
|
|
if (CC(!vmx_control_verify(vmcs12->vm_exit_controls,
|
|
vmx->nested.msrs.exit_ctls_low,
|
|
vmx->nested.msrs.exit_ctls_high)) ||
|
|
CC(nested_vmx_check_exit_msr_switch_controls(vcpu, vmcs12)))
|
|
return -EINVAL;
|
|
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* Checks related to VM-Entry Control Fields
|
|
*/
|
|
static int nested_check_vm_entry_controls(struct kvm_vcpu *vcpu,
|
|
struct vmcs12 *vmcs12)
|
|
{
|
|
struct vcpu_vmx *vmx = to_vmx(vcpu);
|
|
|
|
if (CC(!vmx_control_verify(vmcs12->vm_entry_controls,
|
|
vmx->nested.msrs.entry_ctls_low,
|
|
vmx->nested.msrs.entry_ctls_high)))
|
|
return -EINVAL;
|
|
|
|
/*
|
|
* From the Intel SDM, volume 3:
|
|
* Fields relevant to VM-entry event injection must be set properly.
|
|
* These fields are the VM-entry interruption-information field, the
|
|
* VM-entry exception error code, and the VM-entry instruction length.
|
|
*/
|
|
if (vmcs12->vm_entry_intr_info_field & INTR_INFO_VALID_MASK) {
|
|
u32 intr_info = vmcs12->vm_entry_intr_info_field;
|
|
u8 vector = intr_info & INTR_INFO_VECTOR_MASK;
|
|
u32 intr_type = intr_info & INTR_INFO_INTR_TYPE_MASK;
|
|
bool has_error_code = intr_info & INTR_INFO_DELIVER_CODE_MASK;
|
|
bool should_have_error_code;
|
|
bool urg = nested_cpu_has2(vmcs12,
|
|
SECONDARY_EXEC_UNRESTRICTED_GUEST);
|
|
bool prot_mode = !urg || vmcs12->guest_cr0 & X86_CR0_PE;
|
|
|
|
/* VM-entry interruption-info field: interruption type */
|
|
if (CC(intr_type == INTR_TYPE_RESERVED) ||
|
|
CC(intr_type == INTR_TYPE_OTHER_EVENT &&
|
|
!nested_cpu_supports_monitor_trap_flag(vcpu)))
|
|
return -EINVAL;
|
|
|
|
/* VM-entry interruption-info field: vector */
|
|
if (CC(intr_type == INTR_TYPE_NMI_INTR && vector != NMI_VECTOR) ||
|
|
CC(intr_type == INTR_TYPE_HARD_EXCEPTION && vector > 31) ||
|
|
CC(intr_type == INTR_TYPE_OTHER_EVENT && vector != 0))
|
|
return -EINVAL;
|
|
|
|
/* VM-entry interruption-info field: deliver error code */
|
|
should_have_error_code =
|
|
intr_type == INTR_TYPE_HARD_EXCEPTION && prot_mode &&
|
|
x86_exception_has_error_code(vector);
|
|
if (CC(has_error_code != should_have_error_code))
|
|
return -EINVAL;
|
|
|
|
/* VM-entry exception error code */
|
|
if (CC(has_error_code &&
|
|
vmcs12->vm_entry_exception_error_code & GENMASK(31, 16)))
|
|
return -EINVAL;
|
|
|
|
/* VM-entry interruption-info field: reserved bits */
|
|
if (CC(intr_info & INTR_INFO_RESVD_BITS_MASK))
|
|
return -EINVAL;
|
|
|
|
/* VM-entry instruction length */
|
|
switch (intr_type) {
|
|
case INTR_TYPE_SOFT_EXCEPTION:
|
|
case INTR_TYPE_SOFT_INTR:
|
|
case INTR_TYPE_PRIV_SW_EXCEPTION:
|
|
if (CC(vmcs12->vm_entry_instruction_len > 15) ||
|
|
CC(vmcs12->vm_entry_instruction_len == 0 &&
|
|
CC(!nested_cpu_has_zero_length_injection(vcpu))))
|
|
return -EINVAL;
|
|
}
|
|
}
|
|
|
|
if (nested_vmx_check_entry_msr_switch_controls(vcpu, vmcs12))
|
|
return -EINVAL;
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int nested_vmx_check_controls(struct kvm_vcpu *vcpu,
|
|
struct vmcs12 *vmcs12)
|
|
{
|
|
if (nested_check_vm_execution_controls(vcpu, vmcs12) ||
|
|
nested_check_vm_exit_controls(vcpu, vmcs12) ||
|
|
nested_check_vm_entry_controls(vcpu, vmcs12))
|
|
return -EINVAL;
|
|
|
|
if (to_vmx(vcpu)->nested.enlightened_vmcs_enabled)
|
|
return nested_evmcs_check_controls(vmcs12);
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int nested_vmx_check_address_space_size(struct kvm_vcpu *vcpu,
|
|
struct vmcs12 *vmcs12)
|
|
{
|
|
#ifdef CONFIG_X86_64
|
|
if (CC(!!(vmcs12->vm_exit_controls & VM_EXIT_HOST_ADDR_SPACE_SIZE) !=
|
|
!!(vcpu->arch.efer & EFER_LMA)))
|
|
return -EINVAL;
|
|
#endif
|
|
return 0;
|
|
}
|
|
|
|
static int nested_vmx_check_host_state(struct kvm_vcpu *vcpu,
|
|
struct vmcs12 *vmcs12)
|
|
{
|
|
bool ia32e;
|
|
|
|
if (CC(!nested_host_cr0_valid(vcpu, vmcs12->host_cr0)) ||
|
|
CC(!nested_host_cr4_valid(vcpu, vmcs12->host_cr4)) ||
|
|
CC(kvm_vcpu_is_illegal_gpa(vcpu, vmcs12->host_cr3)))
|
|
return -EINVAL;
|
|
|
|
if (CC(is_noncanonical_address(vmcs12->host_ia32_sysenter_esp, vcpu)) ||
|
|
CC(is_noncanonical_address(vmcs12->host_ia32_sysenter_eip, vcpu)))
|
|
return -EINVAL;
|
|
|
|
if ((vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_PAT) &&
|
|
CC(!kvm_pat_valid(vmcs12->host_ia32_pat)))
|
|
return -EINVAL;
|
|
|
|
if ((vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL) &&
|
|
CC(!kvm_valid_perf_global_ctrl(vcpu_to_pmu(vcpu),
|
|
vmcs12->host_ia32_perf_global_ctrl)))
|
|
return -EINVAL;
|
|
|
|
#ifdef CONFIG_X86_64
|
|
ia32e = !!(vmcs12->vm_exit_controls & VM_EXIT_HOST_ADDR_SPACE_SIZE);
|
|
#else
|
|
ia32e = false;
|
|
#endif
|
|
|
|
if (ia32e) {
|
|
if (CC(!(vmcs12->host_cr4 & X86_CR4_PAE)))
|
|
return -EINVAL;
|
|
} else {
|
|
if (CC(vmcs12->vm_entry_controls & VM_ENTRY_IA32E_MODE) ||
|
|
CC(vmcs12->host_cr4 & X86_CR4_PCIDE) ||
|
|
CC((vmcs12->host_rip) >> 32))
|
|
return -EINVAL;
|
|
}
|
|
|
|
if (CC(vmcs12->host_cs_selector & (SEGMENT_RPL_MASK | SEGMENT_TI_MASK)) ||
|
|
CC(vmcs12->host_ss_selector & (SEGMENT_RPL_MASK | SEGMENT_TI_MASK)) ||
|
|
CC(vmcs12->host_ds_selector & (SEGMENT_RPL_MASK | SEGMENT_TI_MASK)) ||
|
|
CC(vmcs12->host_es_selector & (SEGMENT_RPL_MASK | SEGMENT_TI_MASK)) ||
|
|
CC(vmcs12->host_fs_selector & (SEGMENT_RPL_MASK | SEGMENT_TI_MASK)) ||
|
|
CC(vmcs12->host_gs_selector & (SEGMENT_RPL_MASK | SEGMENT_TI_MASK)) ||
|
|
CC(vmcs12->host_tr_selector & (SEGMENT_RPL_MASK | SEGMENT_TI_MASK)) ||
|
|
CC(vmcs12->host_cs_selector == 0) ||
|
|
CC(vmcs12->host_tr_selector == 0) ||
|
|
CC(vmcs12->host_ss_selector == 0 && !ia32e))
|
|
return -EINVAL;
|
|
|
|
if (CC(is_noncanonical_address(vmcs12->host_fs_base, vcpu)) ||
|
|
CC(is_noncanonical_address(vmcs12->host_gs_base, vcpu)) ||
|
|
CC(is_noncanonical_address(vmcs12->host_gdtr_base, vcpu)) ||
|
|
CC(is_noncanonical_address(vmcs12->host_idtr_base, vcpu)) ||
|
|
CC(is_noncanonical_address(vmcs12->host_tr_base, vcpu)) ||
|
|
CC(is_noncanonical_address(vmcs12->host_rip, vcpu)))
|
|
return -EINVAL;
|
|
|
|
/*
|
|
* If the load IA32_EFER VM-exit control is 1, bits reserved in the
|
|
* IA32_EFER MSR must be 0 in the field for that register. In addition,
|
|
* the values of the LMA and LME bits in the field must each be that of
|
|
* the host address-space size VM-exit control.
|
|
*/
|
|
if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_EFER) {
|
|
if (CC(!kvm_valid_efer(vcpu, vmcs12->host_ia32_efer)) ||
|
|
CC(ia32e != !!(vmcs12->host_ia32_efer & EFER_LMA)) ||
|
|
CC(ia32e != !!(vmcs12->host_ia32_efer & EFER_LME)))
|
|
return -EINVAL;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int nested_vmx_check_vmcs_link_ptr(struct kvm_vcpu *vcpu,
|
|
struct vmcs12 *vmcs12)
|
|
{
|
|
int r = 0;
|
|
struct vmcs12 *shadow;
|
|
struct kvm_host_map map;
|
|
|
|
if (vmcs12->vmcs_link_pointer == -1ull)
|
|
return 0;
|
|
|
|
if (CC(!page_address_valid(vcpu, vmcs12->vmcs_link_pointer)))
|
|
return -EINVAL;
|
|
|
|
if (CC(kvm_vcpu_map(vcpu, gpa_to_gfn(vmcs12->vmcs_link_pointer), &map)))
|
|
return -EINVAL;
|
|
|
|
shadow = map.hva;
|
|
|
|
if (CC(shadow->hdr.revision_id != VMCS12_REVISION) ||
|
|
CC(shadow->hdr.shadow_vmcs != nested_cpu_has_shadow_vmcs(vmcs12)))
|
|
r = -EINVAL;
|
|
|
|
kvm_vcpu_unmap(vcpu, &map, false);
|
|
return r;
|
|
}
|
|
|
|
/*
|
|
* Checks related to Guest Non-register State
|
|
*/
|
|
static int nested_check_guest_non_reg_state(struct vmcs12 *vmcs12)
|
|
{
|
|
if (CC(vmcs12->guest_activity_state != GUEST_ACTIVITY_ACTIVE &&
|
|
vmcs12->guest_activity_state != GUEST_ACTIVITY_HLT &&
|
|
vmcs12->guest_activity_state != GUEST_ACTIVITY_WAIT_SIPI))
|
|
return -EINVAL;
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int nested_vmx_check_guest_state(struct kvm_vcpu *vcpu,
|
|
struct vmcs12 *vmcs12,
|
|
enum vm_entry_failure_code *entry_failure_code)
|
|
{
|
|
bool ia32e;
|
|
|
|
*entry_failure_code = ENTRY_FAIL_DEFAULT;
|
|
|
|
if (CC(!nested_guest_cr0_valid(vcpu, vmcs12->guest_cr0)) ||
|
|
CC(!nested_guest_cr4_valid(vcpu, vmcs12->guest_cr4)))
|
|
return -EINVAL;
|
|
|
|
if ((vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS) &&
|
|
CC(!kvm_dr7_valid(vmcs12->guest_dr7)))
|
|
return -EINVAL;
|
|
|
|
if ((vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_PAT) &&
|
|
CC(!kvm_pat_valid(vmcs12->guest_ia32_pat)))
|
|
return -EINVAL;
|
|
|
|
if (nested_vmx_check_vmcs_link_ptr(vcpu, vmcs12)) {
|
|
*entry_failure_code = ENTRY_FAIL_VMCS_LINK_PTR;
|
|
return -EINVAL;
|
|
}
|
|
|
|
if ((vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL) &&
|
|
CC(!kvm_valid_perf_global_ctrl(vcpu_to_pmu(vcpu),
|
|
vmcs12->guest_ia32_perf_global_ctrl)))
|
|
return -EINVAL;
|
|
|
|
/*
|
|
* If the load IA32_EFER VM-entry control is 1, the following checks
|
|
* are performed on the field for the IA32_EFER MSR:
|
|
* - Bits reserved in the IA32_EFER MSR must be 0.
|
|
* - Bit 10 (corresponding to IA32_EFER.LMA) must equal the value of
|
|
* the IA-32e mode guest VM-exit control. It must also be identical
|
|
* to bit 8 (LME) if bit 31 in the CR0 field (corresponding to
|
|
* CR0.PG) is 1.
|
|
*/
|
|
if (to_vmx(vcpu)->nested.nested_run_pending &&
|
|
(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_IA32_EFER)) {
|
|
ia32e = (vmcs12->vm_entry_controls & VM_ENTRY_IA32E_MODE) != 0;
|
|
if (CC(!kvm_valid_efer(vcpu, vmcs12->guest_ia32_efer)) ||
|
|
CC(ia32e != !!(vmcs12->guest_ia32_efer & EFER_LMA)) ||
|
|
CC(((vmcs12->guest_cr0 & X86_CR0_PG) &&
|
|
ia32e != !!(vmcs12->guest_ia32_efer & EFER_LME))))
|
|
return -EINVAL;
|
|
}
|
|
|
|
if ((vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS) &&
|
|
(CC(is_noncanonical_address(vmcs12->guest_bndcfgs & PAGE_MASK, vcpu)) ||
|
|
CC((vmcs12->guest_bndcfgs & MSR_IA32_BNDCFGS_RSVD))))
|
|
return -EINVAL;
|
|
|
|
if (nested_check_guest_non_reg_state(vmcs12))
|
|
return -EINVAL;
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int nested_vmx_check_vmentry_hw(struct kvm_vcpu *vcpu)
|
|
{
|
|
struct vcpu_vmx *vmx = to_vmx(vcpu);
|
|
unsigned long cr3, cr4;
|
|
bool vm_fail;
|
|
|
|
if (!nested_early_check)
|
|
return 0;
|
|
|
|
if (vmx->msr_autoload.host.nr)
|
|
vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, 0);
|
|
if (vmx->msr_autoload.guest.nr)
|
|
vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, 0);
|
|
|
|
preempt_disable();
|
|
|
|
vmx_prepare_switch_to_guest(vcpu);
|
|
|
|
/*
|
|
* Induce a consistency check VMExit by clearing bit 1 in GUEST_RFLAGS,
|
|
* which is reserved to '1' by hardware. GUEST_RFLAGS is guaranteed to
|
|
* be written (by prepare_vmcs02()) before the "real" VMEnter, i.e.
|
|
* there is no need to preserve other bits or save/restore the field.
|
|
*/
|
|
vmcs_writel(GUEST_RFLAGS, 0);
|
|
|
|
cr3 = __get_current_cr3_fast();
|
|
if (unlikely(cr3 != vmx->loaded_vmcs->host_state.cr3)) {
|
|
vmcs_writel(HOST_CR3, cr3);
|
|
vmx->loaded_vmcs->host_state.cr3 = cr3;
|
|
}
|
|
|
|
cr4 = cr4_read_shadow();
|
|
if (unlikely(cr4 != vmx->loaded_vmcs->host_state.cr4)) {
|
|
vmcs_writel(HOST_CR4, cr4);
|
|
vmx->loaded_vmcs->host_state.cr4 = cr4;
|
|
}
|
|
|
|
vm_fail = __vmx_vcpu_run(vmx, (unsigned long *)&vcpu->arch.regs,
|
|
__vmx_vcpu_run_flags(vmx));
|
|
|
|
if (vmx->msr_autoload.host.nr)
|
|
vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.host.nr);
|
|
if (vmx->msr_autoload.guest.nr)
|
|
vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, vmx->msr_autoload.guest.nr);
|
|
|
|
if (vm_fail) {
|
|
u32 error = vmcs_read32(VM_INSTRUCTION_ERROR);
|
|
|
|
preempt_enable();
|
|
|
|
trace_kvm_nested_vmenter_failed(
|
|
"early hardware check VM-instruction error: ", error);
|
|
WARN_ON_ONCE(error != VMXERR_ENTRY_INVALID_CONTROL_FIELD);
|
|
return 1;
|
|
}
|
|
|
|
/*
|
|
* VMExit clears RFLAGS.IF and DR7, even on a consistency check.
|
|
*/
|
|
if (hw_breakpoint_active())
|
|
set_debugreg(__this_cpu_read(cpu_dr7), 7);
|
|
local_irq_enable();
|
|
preempt_enable();
|
|
|
|
/*
|
|
* A non-failing VMEntry means we somehow entered guest mode with
|
|
* an illegal RIP, and that's just the tip of the iceberg. There
|
|
* is no telling what memory has been modified or what state has
|
|
* been exposed to unknown code. Hitting this all but guarantees
|
|
* a (very critical) hardware issue.
|
|
*/
|
|
WARN_ON(!(vmcs_read32(VM_EXIT_REASON) &
|
|
VMX_EXIT_REASONS_FAILED_VMENTRY));
|
|
|
|
return 0;
|
|
}
|
|
|
|
static bool nested_get_evmcs_page(struct kvm_vcpu *vcpu)
|
|
{
|
|
struct vcpu_vmx *vmx = to_vmx(vcpu);
|
|
|
|
/*
|
|
* hv_evmcs may end up being not mapped after migration (when
|
|
* L2 was running), map it here to make sure vmcs12 changes are
|
|
* properly reflected.
|
|
*/
|
|
if (vmx->nested.enlightened_vmcs_enabled &&
|
|
vmx->nested.hv_evmcs_vmptr == EVMPTR_MAP_PENDING) {
|
|
enum nested_evmptrld_status evmptrld_status =
|
|
nested_vmx_handle_enlightened_vmptrld(vcpu, false);
|
|
|
|
if (evmptrld_status == EVMPTRLD_VMFAIL ||
|
|
evmptrld_status == EVMPTRLD_ERROR)
|
|
return false;
|
|
|
|
/*
|
|
* Post migration VMCS12 always provides the most actual
|
|
* information, copy it to eVMCS upon entry.
|
|
*/
|
|
vmx->nested.need_vmcs12_to_shadow_sync = true;
|
|
}
|
|
|
|
return true;
|
|
}
|
|
|
|
static bool nested_get_vmcs12_pages(struct kvm_vcpu *vcpu)
|
|
{
|
|
struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
|
|
struct vcpu_vmx *vmx = to_vmx(vcpu);
|
|
struct kvm_host_map *map;
|
|
struct page *page;
|
|
u64 hpa;
|
|
|
|
if (!vcpu->arch.pdptrs_from_userspace &&
|
|
!nested_cpu_has_ept(vmcs12) && is_pae_paging(vcpu)) {
|
|
/*
|
|
* Reload the guest's PDPTRs since after a migration
|
|
* the guest CR3 might be restored prior to setting the nested
|
|
* state which can lead to a load of wrong PDPTRs.
|
|
*/
|
|
if (CC(!load_pdptrs(vcpu, vcpu->arch.walk_mmu, vcpu->arch.cr3)))
|
|
return false;
|
|
}
|
|
|
|
|
|
if (nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) {
|
|
/*
|
|
* Translate L1 physical address to host physical
|
|
* address for vmcs02. Keep the page pinned, so this
|
|
* physical address remains valid. We keep a reference
|
|
* to it so we can release it later.
|
|
*/
|
|
if (vmx->nested.apic_access_page) { /* shouldn't happen */
|
|
kvm_release_page_clean(vmx->nested.apic_access_page);
|
|
vmx->nested.apic_access_page = NULL;
|
|
}
|
|
page = kvm_vcpu_gpa_to_page(vcpu, vmcs12->apic_access_addr);
|
|
if (!is_error_page(page)) {
|
|
vmx->nested.apic_access_page = page;
|
|
hpa = page_to_phys(vmx->nested.apic_access_page);
|
|
vmcs_write64(APIC_ACCESS_ADDR, hpa);
|
|
} else {
|
|
pr_debug_ratelimited("%s: no backing 'struct page' for APIC-access address in vmcs12\n",
|
|
__func__);
|
|
vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
|
|
vcpu->run->internal.suberror =
|
|
KVM_INTERNAL_ERROR_EMULATION;
|
|
vcpu->run->internal.ndata = 0;
|
|
return false;
|
|
}
|
|
}
|
|
|
|
if (nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW)) {
|
|
map = &vmx->nested.virtual_apic_map;
|
|
|
|
if (!kvm_vcpu_map(vcpu, gpa_to_gfn(vmcs12->virtual_apic_page_addr), map)) {
|
|
vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, pfn_to_hpa(map->pfn));
|
|
} else if (nested_cpu_has(vmcs12, CPU_BASED_CR8_LOAD_EXITING) &&
|
|
nested_cpu_has(vmcs12, CPU_BASED_CR8_STORE_EXITING) &&
|
|
!nested_cpu_has2(vmcs12, SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES)) {
|
|
/*
|
|
* The processor will never use the TPR shadow, simply
|
|
* clear the bit from the execution control. Such a
|
|
* configuration is useless, but it happens in tests.
|
|
* For any other configuration, failing the vm entry is
|
|
* _not_ what the processor does but it's basically the
|
|
* only possibility we have.
|
|
*/
|
|
exec_controls_clearbit(vmx, CPU_BASED_TPR_SHADOW);
|
|
} else {
|
|
/*
|
|
* Write an illegal value to VIRTUAL_APIC_PAGE_ADDR to
|
|
* force VM-Entry to fail.
|
|
*/
|
|
vmcs_write64(VIRTUAL_APIC_PAGE_ADDR, -1ull);
|
|
}
|
|
}
|
|
|
|
if (nested_cpu_has_posted_intr(vmcs12)) {
|
|
map = &vmx->nested.pi_desc_map;
|
|
|
|
if (!kvm_vcpu_map(vcpu, gpa_to_gfn(vmcs12->posted_intr_desc_addr), map)) {
|
|
vmx->nested.pi_desc =
|
|
(struct pi_desc *)(((void *)map->hva) +
|
|
offset_in_page(vmcs12->posted_intr_desc_addr));
|
|
vmcs_write64(POSTED_INTR_DESC_ADDR,
|
|
pfn_to_hpa(map->pfn) + offset_in_page(vmcs12->posted_intr_desc_addr));
|
|
} else {
|
|
/*
|
|
* Defer the KVM_INTERNAL_EXIT until KVM tries to
|
|
* access the contents of the VMCS12 posted interrupt
|
|
* descriptor. (Note that KVM may do this when it
|
|
* should not, per the architectural specification.)
|
|
*/
|
|
vmx->nested.pi_desc = NULL;
|
|
pin_controls_clearbit(vmx, PIN_BASED_POSTED_INTR);
|
|
}
|
|
}
|
|
if (nested_vmx_prepare_msr_bitmap(vcpu, vmcs12))
|
|
exec_controls_setbit(vmx, CPU_BASED_USE_MSR_BITMAPS);
|
|
else
|
|
exec_controls_clearbit(vmx, CPU_BASED_USE_MSR_BITMAPS);
|
|
|
|
return true;
|
|
}
|
|
|
|
static bool vmx_get_nested_state_pages(struct kvm_vcpu *vcpu)
|
|
{
|
|
if (!nested_get_evmcs_page(vcpu)) {
|
|
pr_debug_ratelimited("%s: enlightened vmptrld failed\n",
|
|
__func__);
|
|
vcpu->run->exit_reason = KVM_EXIT_INTERNAL_ERROR;
|
|
vcpu->run->internal.suberror =
|
|
KVM_INTERNAL_ERROR_EMULATION;
|
|
vcpu->run->internal.ndata = 0;
|
|
|
|
return false;
|
|
}
|
|
|
|
if (is_guest_mode(vcpu) && !nested_get_vmcs12_pages(vcpu))
|
|
return false;
|
|
|
|
return true;
|
|
}
|
|
|
|
static int nested_vmx_write_pml_buffer(struct kvm_vcpu *vcpu, gpa_t gpa)
|
|
{
|
|
struct vmcs12 *vmcs12;
|
|
struct vcpu_vmx *vmx = to_vmx(vcpu);
|
|
gpa_t dst;
|
|
|
|
if (WARN_ON_ONCE(!is_guest_mode(vcpu)))
|
|
return 0;
|
|
|
|
if (WARN_ON_ONCE(vmx->nested.pml_full))
|
|
return 1;
|
|
|
|
/*
|
|
* Check if PML is enabled for the nested guest. Whether eptp bit 6 is
|
|
* set is already checked as part of A/D emulation.
|
|
*/
|
|
vmcs12 = get_vmcs12(vcpu);
|
|
if (!nested_cpu_has_pml(vmcs12))
|
|
return 0;
|
|
|
|
if (vmcs12->guest_pml_index >= PML_ENTITY_NUM) {
|
|
vmx->nested.pml_full = true;
|
|
return 1;
|
|
}
|
|
|
|
gpa &= ~0xFFFull;
|
|
dst = vmcs12->pml_address + sizeof(u64) * vmcs12->guest_pml_index;
|
|
|
|
if (kvm_write_guest_page(vcpu->kvm, gpa_to_gfn(dst), &gpa,
|
|
offset_in_page(dst), sizeof(gpa)))
|
|
return 0;
|
|
|
|
vmcs12->guest_pml_index--;
|
|
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* Intel's VMX Instruction Reference specifies a common set of prerequisites
|
|
* for running VMX instructions (except VMXON, whose prerequisites are
|
|
* slightly different). It also specifies what exception to inject otherwise.
|
|
* Note that many of these exceptions have priority over VM exits, so they
|
|
* don't have to be checked again here.
|
|
*/
|
|
static int nested_vmx_check_permission(struct kvm_vcpu *vcpu)
|
|
{
|
|
if (!to_vmx(vcpu)->nested.vmxon) {
|
|
kvm_queue_exception(vcpu, UD_VECTOR);
|
|
return 0;
|
|
}
|
|
|
|
if (vmx_get_cpl(vcpu)) {
|
|
kvm_inject_gp(vcpu, 0);
|
|
return 0;
|
|
}
|
|
|
|
return 1;
|
|
}
|
|
|
|
static u8 vmx_has_apicv_interrupt(struct kvm_vcpu *vcpu)
|
|
{
|
|
u8 rvi = vmx_get_rvi();
|
|
u8 vppr = kvm_lapic_get_reg(vcpu->arch.apic, APIC_PROCPRI);
|
|
|
|
return ((rvi & 0xf0) > (vppr & 0xf0));
|
|
}
|
|
|
|
static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
|
|
struct vmcs12 *vmcs12);
|
|
|
|
/*
|
|
* If from_vmentry is false, this is being called from state restore (either RSM
|
|
* or KVM_SET_NESTED_STATE). Otherwise it's called from vmlaunch/vmresume.
|
|
*
|
|
* Returns:
|
|
* NVMX_VMENTRY_SUCCESS: Entered VMX non-root mode
|
|
* NVMX_VMENTRY_VMFAIL: Consistency check VMFail
|
|
* NVMX_VMENTRY_VMEXIT: Consistency check VMExit
|
|
* NVMX_VMENTRY_KVM_INTERNAL_ERROR: KVM internal error
|
|
*/
|
|
enum nvmx_vmentry_status nested_vmx_enter_non_root_mode(struct kvm_vcpu *vcpu,
|
|
bool from_vmentry)
|
|
{
|
|
struct vcpu_vmx *vmx = to_vmx(vcpu);
|
|
struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
|
|
enum vm_entry_failure_code entry_failure_code;
|
|
bool evaluate_pending_interrupts;
|
|
union vmx_exit_reason exit_reason = {
|
|
.basic = EXIT_REASON_INVALID_STATE,
|
|
.failed_vmentry = 1,
|
|
};
|
|
u32 failed_index;
|
|
|
|
kvm_service_local_tlb_flush_requests(vcpu);
|
|
|
|
evaluate_pending_interrupts = exec_controls_get(vmx) &
|
|
(CPU_BASED_INTR_WINDOW_EXITING | CPU_BASED_NMI_WINDOW_EXITING);
|
|
if (likely(!evaluate_pending_interrupts) && kvm_vcpu_apicv_active(vcpu))
|
|
evaluate_pending_interrupts |= vmx_has_apicv_interrupt(vcpu);
|
|
|
|
if (!vmx->nested.nested_run_pending ||
|
|
!(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS))
|
|
vmx->nested.vmcs01_debugctl = vmcs_read64(GUEST_IA32_DEBUGCTL);
|
|
if (kvm_mpx_supported() &&
|
|
(!vmx->nested.nested_run_pending ||
|
|
!(vmcs12->vm_entry_controls & VM_ENTRY_LOAD_BNDCFGS)))
|
|
vmx->nested.vmcs01_guest_bndcfgs = vmcs_read64(GUEST_BNDCFGS);
|
|
|
|
/*
|
|
* Overwrite vmcs01.GUEST_CR3 with L1's CR3 if EPT is disabled *and*
|
|
* nested early checks are disabled. In the event of a "late" VM-Fail,
|
|
* i.e. a VM-Fail detected by hardware but not KVM, KVM must unwind its
|
|
* software model to the pre-VMEntry host state. When EPT is disabled,
|
|
* GUEST_CR3 holds KVM's shadow CR3, not L1's "real" CR3, which causes
|
|
* nested_vmx_restore_host_state() to corrupt vcpu->arch.cr3. Stuffing
|
|
* vmcs01.GUEST_CR3 results in the unwind naturally setting arch.cr3 to
|
|
* the correct value. Smashing vmcs01.GUEST_CR3 is safe because nested
|
|
* VM-Exits, and the unwind, reset KVM's MMU, i.e. vmcs01.GUEST_CR3 is
|
|
* guaranteed to be overwritten with a shadow CR3 prior to re-entering
|
|
* L1. Don't stuff vmcs01.GUEST_CR3 when using nested early checks as
|
|
* KVM modifies vcpu->arch.cr3 if and only if the early hardware checks
|
|
* pass, and early VM-Fails do not reset KVM's MMU, i.e. the VM-Fail
|
|
* path would need to manually save/restore vmcs01.GUEST_CR3.
|
|
*/
|
|
if (!enable_ept && !nested_early_check)
|
|
vmcs_writel(GUEST_CR3, vcpu->arch.cr3);
|
|
|
|
vmx_switch_vmcs(vcpu, &vmx->nested.vmcs02);
|
|
|
|
prepare_vmcs02_early(vmx, &vmx->vmcs01, vmcs12);
|
|
|
|
if (from_vmentry) {
|
|
if (unlikely(!nested_get_vmcs12_pages(vcpu))) {
|
|
vmx_switch_vmcs(vcpu, &vmx->vmcs01);
|
|
return NVMX_VMENTRY_KVM_INTERNAL_ERROR;
|
|
}
|
|
|
|
if (nested_vmx_check_vmentry_hw(vcpu)) {
|
|
vmx_switch_vmcs(vcpu, &vmx->vmcs01);
|
|
return NVMX_VMENTRY_VMFAIL;
|
|
}
|
|
|
|
if (nested_vmx_check_guest_state(vcpu, vmcs12,
|
|
&entry_failure_code)) {
|
|
exit_reason.basic = EXIT_REASON_INVALID_STATE;
|
|
vmcs12->exit_qualification = entry_failure_code;
|
|
goto vmentry_fail_vmexit;
|
|
}
|
|
}
|
|
|
|
enter_guest_mode(vcpu);
|
|
|
|
if (prepare_vmcs02(vcpu, vmcs12, from_vmentry, &entry_failure_code)) {
|
|
exit_reason.basic = EXIT_REASON_INVALID_STATE;
|
|
vmcs12->exit_qualification = entry_failure_code;
|
|
goto vmentry_fail_vmexit_guest_mode;
|
|
}
|
|
|
|
if (from_vmentry) {
|
|
failed_index = nested_vmx_load_msr(vcpu,
|
|
vmcs12->vm_entry_msr_load_addr,
|
|
vmcs12->vm_entry_msr_load_count);
|
|
if (failed_index) {
|
|
exit_reason.basic = EXIT_REASON_MSR_LOAD_FAIL;
|
|
vmcs12->exit_qualification = failed_index;
|
|
goto vmentry_fail_vmexit_guest_mode;
|
|
}
|
|
} else {
|
|
/*
|
|
* The MMU is not initialized to point at the right entities yet and
|
|
* "get pages" would need to read data from the guest (i.e. we will
|
|
* need to perform gpa to hpa translation). Request a call
|
|
* to nested_get_vmcs12_pages before the next VM-entry. The MSRs
|
|
* have already been set at vmentry time and should not be reset.
|
|
*/
|
|
kvm_make_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu);
|
|
}
|
|
|
|
/*
|
|
* If L1 had a pending IRQ/NMI until it executed
|
|
* VMLAUNCH/VMRESUME which wasn't delivered because it was
|
|
* disallowed (e.g. interrupts disabled), L0 needs to
|
|
* evaluate if this pending event should cause an exit from L2
|
|
* to L1 or delivered directly to L2 (e.g. In case L1 don't
|
|
* intercept EXTERNAL_INTERRUPT).
|
|
*
|
|
* Usually this would be handled by the processor noticing an
|
|
* IRQ/NMI window request, or checking RVI during evaluation of
|
|
* pending virtual interrupts. However, this setting was done
|
|
* on VMCS01 and now VMCS02 is active instead. Thus, we force L0
|
|
* to perform pending event evaluation by requesting a KVM_REQ_EVENT.
|
|
*/
|
|
if (unlikely(evaluate_pending_interrupts))
|
|
kvm_make_request(KVM_REQ_EVENT, vcpu);
|
|
|
|
/*
|
|
* Do not start the preemption timer hrtimer until after we know
|
|
* we are successful, so that only nested_vmx_vmexit needs to cancel
|
|
* the timer.
|
|
*/
|
|
vmx->nested.preemption_timer_expired = false;
|
|
if (nested_cpu_has_preemption_timer(vmcs12)) {
|
|
u64 timer_value = vmx_calc_preemption_timer_value(vcpu);
|
|
vmx_start_preemption_timer(vcpu, timer_value);
|
|
}
|
|
|
|
/*
|
|
* Note no nested_vmx_succeed or nested_vmx_fail here. At this point
|
|
* we are no longer running L1, and VMLAUNCH/VMRESUME has not yet
|
|
* returned as far as L1 is concerned. It will only return (and set
|
|
* the success flag) when L2 exits (see nested_vmx_vmexit()).
|
|
*/
|
|
return NVMX_VMENTRY_SUCCESS;
|
|
|
|
/*
|
|
* A failed consistency check that leads to a VMExit during L1's
|
|
* VMEnter to L2 is a variation of a normal VMexit, as explained in
|
|
* 26.7 "VM-entry failures during or after loading guest state".
|
|
*/
|
|
vmentry_fail_vmexit_guest_mode:
|
|
if (vmcs12->cpu_based_vm_exec_control & CPU_BASED_USE_TSC_OFFSETTING)
|
|
vcpu->arch.tsc_offset -= vmcs12->tsc_offset;
|
|
leave_guest_mode(vcpu);
|
|
|
|
vmentry_fail_vmexit:
|
|
vmx_switch_vmcs(vcpu, &vmx->vmcs01);
|
|
|
|
if (!from_vmentry)
|
|
return NVMX_VMENTRY_VMEXIT;
|
|
|
|
load_vmcs12_host_state(vcpu, vmcs12);
|
|
vmcs12->vm_exit_reason = exit_reason.full;
|
|
if (enable_shadow_vmcs || evmptr_is_valid(vmx->nested.hv_evmcs_vmptr))
|
|
vmx->nested.need_vmcs12_to_shadow_sync = true;
|
|
return NVMX_VMENTRY_VMEXIT;
|
|
}
|
|
|
|
/*
|
|
* nested_vmx_run() handles a nested entry, i.e., a VMLAUNCH or VMRESUME on L1
|
|
* for running an L2 nested guest.
|
|
*/
|
|
static int nested_vmx_run(struct kvm_vcpu *vcpu, bool launch)
|
|
{
|
|
struct vmcs12 *vmcs12;
|
|
enum nvmx_vmentry_status status;
|
|
struct vcpu_vmx *vmx = to_vmx(vcpu);
|
|
u32 interrupt_shadow = vmx_get_interrupt_shadow(vcpu);
|
|
enum nested_evmptrld_status evmptrld_status;
|
|
|
|
if (!nested_vmx_check_permission(vcpu))
|
|
return 1;
|
|
|
|
evmptrld_status = nested_vmx_handle_enlightened_vmptrld(vcpu, launch);
|
|
if (evmptrld_status == EVMPTRLD_ERROR) {
|
|
kvm_queue_exception(vcpu, UD_VECTOR);
|
|
return 1;
|
|
} else if (CC(evmptrld_status == EVMPTRLD_VMFAIL)) {
|
|
return nested_vmx_failInvalid(vcpu);
|
|
}
|
|
|
|
if (CC(!evmptr_is_valid(vmx->nested.hv_evmcs_vmptr) &&
|
|
vmx->nested.current_vmptr == -1ull))
|
|
return nested_vmx_failInvalid(vcpu);
|
|
|
|
vmcs12 = get_vmcs12(vcpu);
|
|
|
|
/*
|
|
* Can't VMLAUNCH or VMRESUME a shadow VMCS. Despite the fact
|
|
* that there *is* a valid VMCS pointer, RFLAGS.CF is set
|
|
* rather than RFLAGS.ZF, and no error number is stored to the
|
|
* VM-instruction error field.
|
|
*/
|
|
if (CC(vmcs12->hdr.shadow_vmcs))
|
|
return nested_vmx_failInvalid(vcpu);
|
|
|
|
if (evmptr_is_valid(vmx->nested.hv_evmcs_vmptr)) {
|
|
copy_enlightened_to_vmcs12(vmx, vmx->nested.hv_evmcs->hv_clean_fields);
|
|
/* Enlightened VMCS doesn't have launch state */
|
|
vmcs12->launch_state = !launch;
|
|
} else if (enable_shadow_vmcs) {
|
|
copy_shadow_to_vmcs12(vmx);
|
|
}
|
|
|
|
/*
|
|
* The nested entry process starts with enforcing various prerequisites
|
|
* on vmcs12 as required by the Intel SDM, and act appropriately when
|
|
* they fail: As the SDM explains, some conditions should cause the
|
|
* instruction to fail, while others will cause the instruction to seem
|
|
* to succeed, but return an EXIT_REASON_INVALID_STATE.
|
|
* To speed up the normal (success) code path, we should avoid checking
|
|
* for misconfigurations which will anyway be caught by the processor
|
|
* when using the merged vmcs02.
|
|
*/
|
|
if (CC(interrupt_shadow & KVM_X86_SHADOW_INT_MOV_SS))
|
|
return nested_vmx_fail(vcpu, VMXERR_ENTRY_EVENTS_BLOCKED_BY_MOV_SS);
|
|
|
|
if (CC(vmcs12->launch_state == launch))
|
|
return nested_vmx_fail(vcpu,
|
|
launch ? VMXERR_VMLAUNCH_NONCLEAR_VMCS
|
|
: VMXERR_VMRESUME_NONLAUNCHED_VMCS);
|
|
|
|
if (nested_vmx_check_controls(vcpu, vmcs12))
|
|
return nested_vmx_fail(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD);
|
|
|
|
if (nested_vmx_check_address_space_size(vcpu, vmcs12))
|
|
return nested_vmx_fail(vcpu, VMXERR_ENTRY_INVALID_HOST_STATE_FIELD);
|
|
|
|
if (nested_vmx_check_host_state(vcpu, vmcs12))
|
|
return nested_vmx_fail(vcpu, VMXERR_ENTRY_INVALID_HOST_STATE_FIELD);
|
|
|
|
/*
|
|
* We're finally done with prerequisite checking, and can start with
|
|
* the nested entry.
|
|
*/
|
|
vmx->nested.nested_run_pending = 1;
|
|
vmx->nested.has_preemption_timer_deadline = false;
|
|
status = nested_vmx_enter_non_root_mode(vcpu, true);
|
|
if (unlikely(status != NVMX_VMENTRY_SUCCESS))
|
|
goto vmentry_failed;
|
|
|
|
/* Emulate processing of posted interrupts on VM-Enter. */
|
|
if (nested_cpu_has_posted_intr(vmcs12) &&
|
|
kvm_apic_has_interrupt(vcpu) == vmx->nested.posted_intr_nv) {
|
|
vmx->nested.pi_pending = true;
|
|
kvm_make_request(KVM_REQ_EVENT, vcpu);
|
|
kvm_apic_clear_irr(vcpu, vmx->nested.posted_intr_nv);
|
|
}
|
|
|
|
/* Hide L1D cache contents from the nested guest. */
|
|
vmx->vcpu.arch.l1tf_flush_l1d = true;
|
|
|
|
/*
|
|
* Must happen outside of nested_vmx_enter_non_root_mode() as it will
|
|
* also be used as part of restoring nVMX state for
|
|
* snapshot restore (migration).
|
|
*
|
|
* In this flow, it is assumed that vmcs12 cache was
|
|
* transferred as part of captured nVMX state and should
|
|
* therefore not be read from guest memory (which may not
|
|
* exist on destination host yet).
|
|
*/
|
|
nested_cache_shadow_vmcs12(vcpu, vmcs12);
|
|
|
|
switch (vmcs12->guest_activity_state) {
|
|
case GUEST_ACTIVITY_HLT:
|
|
/*
|
|
* If we're entering a halted L2 vcpu and the L2 vcpu won't be
|
|
* awakened by event injection or by an NMI-window VM-exit or
|
|
* by an interrupt-window VM-exit, halt the vcpu.
|
|
*/
|
|
if (!(vmcs12->vm_entry_intr_info_field & INTR_INFO_VALID_MASK) &&
|
|
!nested_cpu_has(vmcs12, CPU_BASED_NMI_WINDOW_EXITING) &&
|
|
!(nested_cpu_has(vmcs12, CPU_BASED_INTR_WINDOW_EXITING) &&
|
|
(vmcs12->guest_rflags & X86_EFLAGS_IF))) {
|
|
vmx->nested.nested_run_pending = 0;
|
|
return kvm_vcpu_halt(vcpu);
|
|
}
|
|
break;
|
|
case GUEST_ACTIVITY_WAIT_SIPI:
|
|
vmx->nested.nested_run_pending = 0;
|
|
vcpu->arch.mp_state = KVM_MP_STATE_INIT_RECEIVED;
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
|
|
return 1;
|
|
|
|
vmentry_failed:
|
|
vmx->nested.nested_run_pending = 0;
|
|
if (status == NVMX_VMENTRY_KVM_INTERNAL_ERROR)
|
|
return 0;
|
|
if (status == NVMX_VMENTRY_VMEXIT)
|
|
return 1;
|
|
WARN_ON_ONCE(status != NVMX_VMENTRY_VMFAIL);
|
|
return nested_vmx_fail(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD);
|
|
}
|
|
|
|
/*
|
|
* On a nested exit from L2 to L1, vmcs12.guest_cr0 might not be up-to-date
|
|
* because L2 may have changed some cr0 bits directly (CR0_GUEST_HOST_MASK).
|
|
* This function returns the new value we should put in vmcs12.guest_cr0.
|
|
* It's not enough to just return the vmcs02 GUEST_CR0. Rather,
|
|
* 1. Bits that neither L0 nor L1 trapped, were set directly by L2 and are now
|
|
* available in vmcs02 GUEST_CR0. (Note: It's enough to check that L0
|
|
* didn't trap the bit, because if L1 did, so would L0).
|
|
* 2. Bits that L1 asked to trap (and therefore L0 also did) could not have
|
|
* been modified by L2, and L1 knows it. So just leave the old value of
|
|
* the bit from vmcs12.guest_cr0. Note that the bit from vmcs02 GUEST_CR0
|
|
* isn't relevant, because if L0 traps this bit it can set it to anything.
|
|
* 3. Bits that L1 didn't trap, but L0 did. L1 believes the guest could have
|
|
* changed these bits, and therefore they need to be updated, but L0
|
|
* didn't necessarily allow them to be changed in GUEST_CR0 - and rather
|
|
* put them in vmcs02 CR0_READ_SHADOW. So take these bits from there.
|
|
*/
|
|
static inline unsigned long
|
|
vmcs12_guest_cr0(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
|
|
{
|
|
return
|
|
/*1*/ (vmcs_readl(GUEST_CR0) & vcpu->arch.cr0_guest_owned_bits) |
|
|
/*2*/ (vmcs12->guest_cr0 & vmcs12->cr0_guest_host_mask) |
|
|
/*3*/ (vmcs_readl(CR0_READ_SHADOW) & ~(vmcs12->cr0_guest_host_mask |
|
|
vcpu->arch.cr0_guest_owned_bits));
|
|
}
|
|
|
|
static inline unsigned long
|
|
vmcs12_guest_cr4(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
|
|
{
|
|
return
|
|
/*1*/ (vmcs_readl(GUEST_CR4) & vcpu->arch.cr4_guest_owned_bits) |
|
|
/*2*/ (vmcs12->guest_cr4 & vmcs12->cr4_guest_host_mask) |
|
|
/*3*/ (vmcs_readl(CR4_READ_SHADOW) & ~(vmcs12->cr4_guest_host_mask |
|
|
vcpu->arch.cr4_guest_owned_bits));
|
|
}
|
|
|
|
static void vmcs12_save_pending_event(struct kvm_vcpu *vcpu,
|
|
struct vmcs12 *vmcs12,
|
|
u32 vm_exit_reason, u32 exit_intr_info)
|
|
{
|
|
u32 idt_vectoring;
|
|
unsigned int nr;
|
|
|
|
/*
|
|
* Per the SDM, VM-Exits due to double and triple faults are never
|
|
* considered to occur during event delivery, even if the double/triple
|
|
* fault is the result of an escalating vectoring issue.
|
|
*
|
|
* Note, the SDM qualifies the double fault behavior with "The original
|
|
* event results in a double-fault exception". It's unclear why the
|
|
* qualification exists since exits due to double fault can occur only
|
|
* while vectoring a different exception (injected events are never
|
|
* subject to interception), i.e. there's _always_ an original event.
|
|
*
|
|
* The SDM also uses NMI as a confusing example for the "original event
|
|
* causes the VM exit directly" clause. NMI isn't special in any way,
|
|
* the same rule applies to all events that cause an exit directly.
|
|
* NMI is an odd choice for the example because NMIs can only occur on
|
|
* instruction boundaries, i.e. they _can't_ occur during vectoring.
|
|
*/
|
|
if ((u16)vm_exit_reason == EXIT_REASON_TRIPLE_FAULT ||
|
|
((u16)vm_exit_reason == EXIT_REASON_EXCEPTION_NMI &&
|
|
is_double_fault(exit_intr_info))) {
|
|
vmcs12->idt_vectoring_info_field = 0;
|
|
} else if (vcpu->arch.exception.injected) {
|
|
nr = vcpu->arch.exception.nr;
|
|
idt_vectoring = nr | VECTORING_INFO_VALID_MASK;
|
|
|
|
if (kvm_exception_is_soft(nr)) {
|
|
vmcs12->vm_exit_instruction_len =
|
|
vcpu->arch.event_exit_inst_len;
|
|
idt_vectoring |= INTR_TYPE_SOFT_EXCEPTION;
|
|
} else
|
|
idt_vectoring |= INTR_TYPE_HARD_EXCEPTION;
|
|
|
|
if (vcpu->arch.exception.has_error_code) {
|
|
idt_vectoring |= VECTORING_INFO_DELIVER_CODE_MASK;
|
|
vmcs12->idt_vectoring_error_code =
|
|
vcpu->arch.exception.error_code;
|
|
}
|
|
|
|
vmcs12->idt_vectoring_info_field = idt_vectoring;
|
|
} else if (vcpu->arch.nmi_injected) {
|
|
vmcs12->idt_vectoring_info_field =
|
|
INTR_TYPE_NMI_INTR | INTR_INFO_VALID_MASK | NMI_VECTOR;
|
|
} else if (vcpu->arch.interrupt.injected) {
|
|
nr = vcpu->arch.interrupt.nr;
|
|
idt_vectoring = nr | VECTORING_INFO_VALID_MASK;
|
|
|
|
if (vcpu->arch.interrupt.soft) {
|
|
idt_vectoring |= INTR_TYPE_SOFT_INTR;
|
|
vmcs12->vm_entry_instruction_len =
|
|
vcpu->arch.event_exit_inst_len;
|
|
} else
|
|
idt_vectoring |= INTR_TYPE_EXT_INTR;
|
|
|
|
vmcs12->idt_vectoring_info_field = idt_vectoring;
|
|
} else {
|
|
vmcs12->idt_vectoring_info_field = 0;
|
|
}
|
|
}
|
|
|
|
|
|
void nested_mark_vmcs12_pages_dirty(struct kvm_vcpu *vcpu)
|
|
{
|
|
struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
|
|
gfn_t gfn;
|
|
|
|
/*
|
|
* Don't need to mark the APIC access page dirty; it is never
|
|
* written to by the CPU during APIC virtualization.
|
|
*/
|
|
|
|
if (nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW)) {
|
|
gfn = vmcs12->virtual_apic_page_addr >> PAGE_SHIFT;
|
|
kvm_vcpu_mark_page_dirty(vcpu, gfn);
|
|
}
|
|
|
|
if (nested_cpu_has_posted_intr(vmcs12)) {
|
|
gfn = vmcs12->posted_intr_desc_addr >> PAGE_SHIFT;
|
|
kvm_vcpu_mark_page_dirty(vcpu, gfn);
|
|
}
|
|
}
|
|
|
|
static int vmx_complete_nested_posted_interrupt(struct kvm_vcpu *vcpu)
|
|
{
|
|
struct vcpu_vmx *vmx = to_vmx(vcpu);
|
|
int max_irr;
|
|
void *vapic_page;
|
|
u16 status;
|
|
|
|
if (!vmx->nested.pi_pending)
|
|
return 0;
|
|
|
|
if (!vmx->nested.pi_desc)
|
|
goto mmio_needed;
|
|
|
|
vmx->nested.pi_pending = false;
|
|
|
|
if (!pi_test_and_clear_on(vmx->nested.pi_desc))
|
|
return 0;
|
|
|
|
max_irr = find_last_bit((unsigned long *)vmx->nested.pi_desc->pir, 256);
|
|
if (max_irr != 256) {
|
|
vapic_page = vmx->nested.virtual_apic_map.hva;
|
|
if (!vapic_page)
|
|
goto mmio_needed;
|
|
|
|
__kvm_apic_update_irr(vmx->nested.pi_desc->pir,
|
|
vapic_page, &max_irr);
|
|
status = vmcs_read16(GUEST_INTR_STATUS);
|
|
if ((u8)max_irr > ((u8)status & 0xff)) {
|
|
status &= ~0xff;
|
|
status |= (u8)max_irr;
|
|
vmcs_write16(GUEST_INTR_STATUS, status);
|
|
}
|
|
}
|
|
|
|
nested_mark_vmcs12_pages_dirty(vcpu);
|
|
return 0;
|
|
|
|
mmio_needed:
|
|
kvm_handle_memory_failure(vcpu, X86EMUL_IO_NEEDED, NULL);
|
|
return -ENXIO;
|
|
}
|
|
|
|
static void nested_vmx_inject_exception_vmexit(struct kvm_vcpu *vcpu,
|
|
unsigned long exit_qual)
|
|
{
|
|
struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
|
|
unsigned int nr = vcpu->arch.exception.nr;
|
|
u32 intr_info = nr | INTR_INFO_VALID_MASK;
|
|
|
|
if (vcpu->arch.exception.has_error_code) {
|
|
/*
|
|
* Intel CPUs do not generate error codes with bits 31:16 set,
|
|
* and more importantly VMX disallows setting bits 31:16 in the
|
|
* injected error code for VM-Entry. Drop the bits to mimic
|
|
* hardware and avoid inducing failure on nested VM-Entry if L1
|
|
* chooses to inject the exception back to L2. AMD CPUs _do_
|
|
* generate "full" 32-bit error codes, so KVM allows userspace
|
|
* to inject exception error codes with bits 31:16 set.
|
|
*/
|
|
vmcs12->vm_exit_intr_error_code = (u16)vcpu->arch.exception.error_code;
|
|
intr_info |= INTR_INFO_DELIVER_CODE_MASK;
|
|
}
|
|
|
|
if (kvm_exception_is_soft(nr))
|
|
intr_info |= INTR_TYPE_SOFT_EXCEPTION;
|
|
else
|
|
intr_info |= INTR_TYPE_HARD_EXCEPTION;
|
|
|
|
if (!(vmcs12->idt_vectoring_info_field & VECTORING_INFO_VALID_MASK) &&
|
|
vmx_get_nmi_mask(vcpu))
|
|
intr_info |= INTR_INFO_UNBLOCK_NMI;
|
|
|
|
nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI, intr_info, exit_qual);
|
|
}
|
|
|
|
/*
|
|
* Returns true if a debug trap is pending delivery.
|
|
*
|
|
* In KVM, debug traps bear an exception payload. As such, the class of a #DB
|
|
* exception may be inferred from the presence of an exception payload.
|
|
*/
|
|
static inline bool vmx_pending_dbg_trap(struct kvm_vcpu *vcpu)
|
|
{
|
|
return vcpu->arch.exception.pending &&
|
|
vcpu->arch.exception.nr == DB_VECTOR &&
|
|
vcpu->arch.exception.payload;
|
|
}
|
|
|
|
/*
|
|
* Certain VM-exits set the 'pending debug exceptions' field to indicate a
|
|
* recognized #DB (data or single-step) that has yet to be delivered. Since KVM
|
|
* represents these debug traps with a payload that is said to be compatible
|
|
* with the 'pending debug exceptions' field, write the payload to the VMCS
|
|
* field if a VM-exit is delivered before the debug trap.
|
|
*/
|
|
static void nested_vmx_update_pending_dbg(struct kvm_vcpu *vcpu)
|
|
{
|
|
if (vmx_pending_dbg_trap(vcpu))
|
|
vmcs_writel(GUEST_PENDING_DBG_EXCEPTIONS,
|
|
vcpu->arch.exception.payload);
|
|
}
|
|
|
|
static bool nested_vmx_preemption_timer_pending(struct kvm_vcpu *vcpu)
|
|
{
|
|
return nested_cpu_has_preemption_timer(get_vmcs12(vcpu)) &&
|
|
to_vmx(vcpu)->nested.preemption_timer_expired;
|
|
}
|
|
|
|
static int vmx_check_nested_events(struct kvm_vcpu *vcpu)
|
|
{
|
|
struct vcpu_vmx *vmx = to_vmx(vcpu);
|
|
unsigned long exit_qual;
|
|
bool block_nested_events =
|
|
vmx->nested.nested_run_pending || kvm_event_needs_reinjection(vcpu);
|
|
bool mtf_pending = vmx->nested.mtf_pending;
|
|
struct kvm_lapic *apic = vcpu->arch.apic;
|
|
|
|
/*
|
|
* Clear the MTF state. If a higher priority VM-exit is delivered first,
|
|
* this state is discarded.
|
|
*/
|
|
if (!block_nested_events)
|
|
vmx->nested.mtf_pending = false;
|
|
|
|
if (lapic_in_kernel(vcpu) &&
|
|
test_bit(KVM_APIC_INIT, &apic->pending_events)) {
|
|
if (block_nested_events)
|
|
return -EBUSY;
|
|
nested_vmx_update_pending_dbg(vcpu);
|
|
clear_bit(KVM_APIC_INIT, &apic->pending_events);
|
|
if (vcpu->arch.mp_state != KVM_MP_STATE_INIT_RECEIVED)
|
|
nested_vmx_vmexit(vcpu, EXIT_REASON_INIT_SIGNAL, 0, 0);
|
|
return 0;
|
|
}
|
|
|
|
if (lapic_in_kernel(vcpu) &&
|
|
test_bit(KVM_APIC_SIPI, &apic->pending_events)) {
|
|
if (block_nested_events)
|
|
return -EBUSY;
|
|
|
|
clear_bit(KVM_APIC_SIPI, &apic->pending_events);
|
|
if (vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED)
|
|
nested_vmx_vmexit(vcpu, EXIT_REASON_SIPI_SIGNAL, 0,
|
|
apic->sipi_vector & 0xFFUL);
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* Process any exceptions that are not debug traps before MTF.
|
|
*
|
|
* Note that only a pending nested run can block a pending exception.
|
|
* Otherwise an injected NMI/interrupt should either be
|
|
* lost or delivered to the nested hypervisor in the IDT_VECTORING_INFO,
|
|
* while delivering the pending exception.
|
|
*/
|
|
|
|
if (vcpu->arch.exception.pending && !vmx_pending_dbg_trap(vcpu)) {
|
|
if (vmx->nested.nested_run_pending)
|
|
return -EBUSY;
|
|
if (!nested_vmx_check_exception(vcpu, &exit_qual))
|
|
goto no_vmexit;
|
|
nested_vmx_inject_exception_vmexit(vcpu, exit_qual);
|
|
return 0;
|
|
}
|
|
|
|
if (mtf_pending) {
|
|
if (block_nested_events)
|
|
return -EBUSY;
|
|
nested_vmx_update_pending_dbg(vcpu);
|
|
nested_vmx_vmexit(vcpu, EXIT_REASON_MONITOR_TRAP_FLAG, 0, 0);
|
|
return 0;
|
|
}
|
|
|
|
if (vcpu->arch.exception.pending) {
|
|
if (vmx->nested.nested_run_pending)
|
|
return -EBUSY;
|
|
if (!nested_vmx_check_exception(vcpu, &exit_qual))
|
|
goto no_vmexit;
|
|
nested_vmx_inject_exception_vmexit(vcpu, exit_qual);
|
|
return 0;
|
|
}
|
|
|
|
if (nested_vmx_preemption_timer_pending(vcpu)) {
|
|
if (block_nested_events)
|
|
return -EBUSY;
|
|
nested_vmx_vmexit(vcpu, EXIT_REASON_PREEMPTION_TIMER, 0, 0);
|
|
return 0;
|
|
}
|
|
|
|
if (vcpu->arch.smi_pending && !is_smm(vcpu)) {
|
|
if (block_nested_events)
|
|
return -EBUSY;
|
|
goto no_vmexit;
|
|
}
|
|
|
|
if (vcpu->arch.nmi_pending && !vmx_nmi_blocked(vcpu)) {
|
|
if (block_nested_events)
|
|
return -EBUSY;
|
|
if (!nested_exit_on_nmi(vcpu))
|
|
goto no_vmexit;
|
|
|
|
nested_vmx_vmexit(vcpu, EXIT_REASON_EXCEPTION_NMI,
|
|
NMI_VECTOR | INTR_TYPE_NMI_INTR |
|
|
INTR_INFO_VALID_MASK, 0);
|
|
/*
|
|
* The NMI-triggered VM exit counts as injection:
|
|
* clear this one and block further NMIs.
|
|
*/
|
|
vcpu->arch.nmi_pending = 0;
|
|
vmx_set_nmi_mask(vcpu, true);
|
|
return 0;
|
|
}
|
|
|
|
if (kvm_cpu_has_interrupt(vcpu) && !vmx_interrupt_blocked(vcpu)) {
|
|
if (block_nested_events)
|
|
return -EBUSY;
|
|
if (!nested_exit_on_intr(vcpu))
|
|
goto no_vmexit;
|
|
nested_vmx_vmexit(vcpu, EXIT_REASON_EXTERNAL_INTERRUPT, 0, 0);
|
|
return 0;
|
|
}
|
|
|
|
no_vmexit:
|
|
return vmx_complete_nested_posted_interrupt(vcpu);
|
|
}
|
|
|
|
static u32 vmx_get_preemption_timer_value(struct kvm_vcpu *vcpu)
|
|
{
|
|
ktime_t remaining =
|
|
hrtimer_get_remaining(&to_vmx(vcpu)->nested.preemption_timer);
|
|
u64 value;
|
|
|
|
if (ktime_to_ns(remaining) <= 0)
|
|
return 0;
|
|
|
|
value = ktime_to_ns(remaining) * vcpu->arch.virtual_tsc_khz;
|
|
do_div(value, 1000000);
|
|
return value >> VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE;
|
|
}
|
|
|
|
static bool is_vmcs12_ext_field(unsigned long field)
|
|
{
|
|
switch (field) {
|
|
case GUEST_ES_SELECTOR:
|
|
case GUEST_CS_SELECTOR:
|
|
case GUEST_SS_SELECTOR:
|
|
case GUEST_DS_SELECTOR:
|
|
case GUEST_FS_SELECTOR:
|
|
case GUEST_GS_SELECTOR:
|
|
case GUEST_LDTR_SELECTOR:
|
|
case GUEST_TR_SELECTOR:
|
|
case GUEST_ES_LIMIT:
|
|
case GUEST_CS_LIMIT:
|
|
case GUEST_SS_LIMIT:
|
|
case GUEST_DS_LIMIT:
|
|
case GUEST_FS_LIMIT:
|
|
case GUEST_GS_LIMIT:
|
|
case GUEST_LDTR_LIMIT:
|
|
case GUEST_TR_LIMIT:
|
|
case GUEST_GDTR_LIMIT:
|
|
case GUEST_IDTR_LIMIT:
|
|
case GUEST_ES_AR_BYTES:
|
|
case GUEST_DS_AR_BYTES:
|
|
case GUEST_FS_AR_BYTES:
|
|
case GUEST_GS_AR_BYTES:
|
|
case GUEST_LDTR_AR_BYTES:
|
|
case GUEST_TR_AR_BYTES:
|
|
case GUEST_ES_BASE:
|
|
case GUEST_CS_BASE:
|
|
case GUEST_SS_BASE:
|
|
case GUEST_DS_BASE:
|
|
case GUEST_FS_BASE:
|
|
case GUEST_GS_BASE:
|
|
case GUEST_LDTR_BASE:
|
|
case GUEST_TR_BASE:
|
|
case GUEST_GDTR_BASE:
|
|
case GUEST_IDTR_BASE:
|
|
case GUEST_PENDING_DBG_EXCEPTIONS:
|
|
case GUEST_BNDCFGS:
|
|
return true;
|
|
default:
|
|
break;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
static void sync_vmcs02_to_vmcs12_rare(struct kvm_vcpu *vcpu,
|
|
struct vmcs12 *vmcs12)
|
|
{
|
|
struct vcpu_vmx *vmx = to_vmx(vcpu);
|
|
|
|
vmcs12->guest_es_selector = vmcs_read16(GUEST_ES_SELECTOR);
|
|
vmcs12->guest_cs_selector = vmcs_read16(GUEST_CS_SELECTOR);
|
|
vmcs12->guest_ss_selector = vmcs_read16(GUEST_SS_SELECTOR);
|
|
vmcs12->guest_ds_selector = vmcs_read16(GUEST_DS_SELECTOR);
|
|
vmcs12->guest_fs_selector = vmcs_read16(GUEST_FS_SELECTOR);
|
|
vmcs12->guest_gs_selector = vmcs_read16(GUEST_GS_SELECTOR);
|
|
vmcs12->guest_ldtr_selector = vmcs_read16(GUEST_LDTR_SELECTOR);
|
|
vmcs12->guest_tr_selector = vmcs_read16(GUEST_TR_SELECTOR);
|
|
vmcs12->guest_es_limit = vmcs_read32(GUEST_ES_LIMIT);
|
|
vmcs12->guest_cs_limit = vmcs_read32(GUEST_CS_LIMIT);
|
|
vmcs12->guest_ss_limit = vmcs_read32(GUEST_SS_LIMIT);
|
|
vmcs12->guest_ds_limit = vmcs_read32(GUEST_DS_LIMIT);
|
|
vmcs12->guest_fs_limit = vmcs_read32(GUEST_FS_LIMIT);
|
|
vmcs12->guest_gs_limit = vmcs_read32(GUEST_GS_LIMIT);
|
|
vmcs12->guest_ldtr_limit = vmcs_read32(GUEST_LDTR_LIMIT);
|
|
vmcs12->guest_tr_limit = vmcs_read32(GUEST_TR_LIMIT);
|
|
vmcs12->guest_gdtr_limit = vmcs_read32(GUEST_GDTR_LIMIT);
|
|
vmcs12->guest_idtr_limit = vmcs_read32(GUEST_IDTR_LIMIT);
|
|
vmcs12->guest_es_ar_bytes = vmcs_read32(GUEST_ES_AR_BYTES);
|
|
vmcs12->guest_ds_ar_bytes = vmcs_read32(GUEST_DS_AR_BYTES);
|
|
vmcs12->guest_fs_ar_bytes = vmcs_read32(GUEST_FS_AR_BYTES);
|
|
vmcs12->guest_gs_ar_bytes = vmcs_read32(GUEST_GS_AR_BYTES);
|
|
vmcs12->guest_ldtr_ar_bytes = vmcs_read32(GUEST_LDTR_AR_BYTES);
|
|
vmcs12->guest_tr_ar_bytes = vmcs_read32(GUEST_TR_AR_BYTES);
|
|
vmcs12->guest_es_base = vmcs_readl(GUEST_ES_BASE);
|
|
vmcs12->guest_cs_base = vmcs_readl(GUEST_CS_BASE);
|
|
vmcs12->guest_ss_base = vmcs_readl(GUEST_SS_BASE);
|
|
vmcs12->guest_ds_base = vmcs_readl(GUEST_DS_BASE);
|
|
vmcs12->guest_fs_base = vmcs_readl(GUEST_FS_BASE);
|
|
vmcs12->guest_gs_base = vmcs_readl(GUEST_GS_BASE);
|
|
vmcs12->guest_ldtr_base = vmcs_readl(GUEST_LDTR_BASE);
|
|
vmcs12->guest_tr_base = vmcs_readl(GUEST_TR_BASE);
|
|
vmcs12->guest_gdtr_base = vmcs_readl(GUEST_GDTR_BASE);
|
|
vmcs12->guest_idtr_base = vmcs_readl(GUEST_IDTR_BASE);
|
|
vmcs12->guest_pending_dbg_exceptions =
|
|
vmcs_readl(GUEST_PENDING_DBG_EXCEPTIONS);
|
|
if (kvm_mpx_supported())
|
|
vmcs12->guest_bndcfgs = vmcs_read64(GUEST_BNDCFGS);
|
|
|
|
vmx->nested.need_sync_vmcs02_to_vmcs12_rare = false;
|
|
}
|
|
|
|
static void copy_vmcs02_to_vmcs12_rare(struct kvm_vcpu *vcpu,
|
|
struct vmcs12 *vmcs12)
|
|
{
|
|
struct vcpu_vmx *vmx = to_vmx(vcpu);
|
|
int cpu;
|
|
|
|
if (!vmx->nested.need_sync_vmcs02_to_vmcs12_rare)
|
|
return;
|
|
|
|
|
|
WARN_ON_ONCE(vmx->loaded_vmcs != &vmx->vmcs01);
|
|
|
|
cpu = get_cpu();
|
|
vmx->loaded_vmcs = &vmx->nested.vmcs02;
|
|
vmx_vcpu_load_vmcs(vcpu, cpu, &vmx->vmcs01);
|
|
|
|
sync_vmcs02_to_vmcs12_rare(vcpu, vmcs12);
|
|
|
|
vmx->loaded_vmcs = &vmx->vmcs01;
|
|
vmx_vcpu_load_vmcs(vcpu, cpu, &vmx->nested.vmcs02);
|
|
put_cpu();
|
|
}
|
|
|
|
/*
|
|
* Update the guest state fields of vmcs12 to reflect changes that
|
|
* occurred while L2 was running. (The "IA-32e mode guest" bit of the
|
|
* VM-entry controls is also updated, since this is really a guest
|
|
* state bit.)
|
|
*/
|
|
static void sync_vmcs02_to_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12)
|
|
{
|
|
struct vcpu_vmx *vmx = to_vmx(vcpu);
|
|
|
|
if (evmptr_is_valid(vmx->nested.hv_evmcs_vmptr))
|
|
sync_vmcs02_to_vmcs12_rare(vcpu, vmcs12);
|
|
|
|
vmx->nested.need_sync_vmcs02_to_vmcs12_rare =
|
|
!evmptr_is_valid(vmx->nested.hv_evmcs_vmptr);
|
|
|
|
vmcs12->guest_cr0 = vmcs12_guest_cr0(vcpu, vmcs12);
|
|
vmcs12->guest_cr4 = vmcs12_guest_cr4(vcpu, vmcs12);
|
|
|
|
vmcs12->guest_rsp = kvm_rsp_read(vcpu);
|
|
vmcs12->guest_rip = kvm_rip_read(vcpu);
|
|
vmcs12->guest_rflags = vmcs_readl(GUEST_RFLAGS);
|
|
|
|
vmcs12->guest_cs_ar_bytes = vmcs_read32(GUEST_CS_AR_BYTES);
|
|
vmcs12->guest_ss_ar_bytes = vmcs_read32(GUEST_SS_AR_BYTES);
|
|
|
|
vmcs12->guest_interruptibility_info =
|
|
vmcs_read32(GUEST_INTERRUPTIBILITY_INFO);
|
|
|
|
if (vcpu->arch.mp_state == KVM_MP_STATE_HALTED)
|
|
vmcs12->guest_activity_state = GUEST_ACTIVITY_HLT;
|
|
else if (vcpu->arch.mp_state == KVM_MP_STATE_INIT_RECEIVED)
|
|
vmcs12->guest_activity_state = GUEST_ACTIVITY_WAIT_SIPI;
|
|
else
|
|
vmcs12->guest_activity_state = GUEST_ACTIVITY_ACTIVE;
|
|
|
|
if (nested_cpu_has_preemption_timer(vmcs12) &&
|
|
vmcs12->vm_exit_controls & VM_EXIT_SAVE_VMX_PREEMPTION_TIMER &&
|
|
!vmx->nested.nested_run_pending)
|
|
vmcs12->vmx_preemption_timer_value =
|
|
vmx_get_preemption_timer_value(vcpu);
|
|
|
|
/*
|
|
* In some cases (usually, nested EPT), L2 is allowed to change its
|
|
* own CR3 without exiting. If it has changed it, we must keep it.
|
|
* Of course, if L0 is using shadow page tables, GUEST_CR3 was defined
|
|
* by L0, not L1 or L2, so we mustn't unconditionally copy it to vmcs12.
|
|
*
|
|
* Additionally, restore L2's PDPTR to vmcs12.
|
|
*/
|
|
if (enable_ept) {
|
|
vmcs12->guest_cr3 = vmcs_readl(GUEST_CR3);
|
|
if (nested_cpu_has_ept(vmcs12) && is_pae_paging(vcpu)) {
|
|
vmcs12->guest_pdptr0 = vmcs_read64(GUEST_PDPTR0);
|
|
vmcs12->guest_pdptr1 = vmcs_read64(GUEST_PDPTR1);
|
|
vmcs12->guest_pdptr2 = vmcs_read64(GUEST_PDPTR2);
|
|
vmcs12->guest_pdptr3 = vmcs_read64(GUEST_PDPTR3);
|
|
}
|
|
}
|
|
|
|
vmcs12->guest_linear_address = vmcs_readl(GUEST_LINEAR_ADDRESS);
|
|
|
|
if (nested_cpu_has_vid(vmcs12))
|
|
vmcs12->guest_intr_status = vmcs_read16(GUEST_INTR_STATUS);
|
|
|
|
vmcs12->vm_entry_controls =
|
|
(vmcs12->vm_entry_controls & ~VM_ENTRY_IA32E_MODE) |
|
|
(vm_entry_controls_get(to_vmx(vcpu)) & VM_ENTRY_IA32E_MODE);
|
|
|
|
if (vmcs12->vm_exit_controls & VM_EXIT_SAVE_DEBUG_CONTROLS)
|
|
kvm_get_dr(vcpu, 7, (unsigned long *)&vmcs12->guest_dr7);
|
|
|
|
if (vmcs12->vm_exit_controls & VM_EXIT_SAVE_IA32_EFER)
|
|
vmcs12->guest_ia32_efer = vcpu->arch.efer;
|
|
}
|
|
|
|
/*
|
|
* prepare_vmcs12 is part of what we need to do when the nested L2 guest exits
|
|
* and we want to prepare to run its L1 parent. L1 keeps a vmcs for L2 (vmcs12),
|
|
* and this function updates it to reflect the changes to the guest state while
|
|
* L2 was running (and perhaps made some exits which were handled directly by L0
|
|
* without going back to L1), and to reflect the exit reason.
|
|
* Note that we do not have to copy here all VMCS fields, just those that
|
|
* could have changed by the L2 guest or the exit - i.e., the guest-state and
|
|
* exit-information fields only. Other fields are modified by L1 with VMWRITE,
|
|
* which already writes to vmcs12 directly.
|
|
*/
|
|
static void prepare_vmcs12(struct kvm_vcpu *vcpu, struct vmcs12 *vmcs12,
|
|
u32 vm_exit_reason, u32 exit_intr_info,
|
|
unsigned long exit_qualification)
|
|
{
|
|
/* update exit information fields: */
|
|
vmcs12->vm_exit_reason = vm_exit_reason;
|
|
if (to_vmx(vcpu)->exit_reason.enclave_mode)
|
|
vmcs12->vm_exit_reason |= VMX_EXIT_REASONS_SGX_ENCLAVE_MODE;
|
|
vmcs12->exit_qualification = exit_qualification;
|
|
|
|
/*
|
|
* On VM-Exit due to a failed VM-Entry, the VMCS isn't marked launched
|
|
* and only EXIT_REASON and EXIT_QUALIFICATION are updated, all other
|
|
* exit info fields are unmodified.
|
|
*/
|
|
if (!(vmcs12->vm_exit_reason & VMX_EXIT_REASONS_FAILED_VMENTRY)) {
|
|
vmcs12->launch_state = 1;
|
|
|
|
/* vm_entry_intr_info_field is cleared on exit. Emulate this
|
|
* instead of reading the real value. */
|
|
vmcs12->vm_entry_intr_info_field &= ~INTR_INFO_VALID_MASK;
|
|
|
|
/*
|
|
* Transfer the event that L0 or L1 may wanted to inject into
|
|
* L2 to IDT_VECTORING_INFO_FIELD.
|
|
*/
|
|
vmcs12_save_pending_event(vcpu, vmcs12,
|
|
vm_exit_reason, exit_intr_info);
|
|
|
|
vmcs12->vm_exit_intr_info = exit_intr_info;
|
|
vmcs12->vm_exit_instruction_len = vmcs_read32(VM_EXIT_INSTRUCTION_LEN);
|
|
vmcs12->vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
|
|
|
|
/*
|
|
* According to spec, there's no need to store the guest's
|
|
* MSRs if the exit is due to a VM-entry failure that occurs
|
|
* during or after loading the guest state. Since this exit
|
|
* does not fall in that category, we need to save the MSRs.
|
|
*/
|
|
if (nested_vmx_store_msr(vcpu,
|
|
vmcs12->vm_exit_msr_store_addr,
|
|
vmcs12->vm_exit_msr_store_count))
|
|
nested_vmx_abort(vcpu,
|
|
VMX_ABORT_SAVE_GUEST_MSR_FAIL);
|
|
}
|
|
}
|
|
|
|
/*
|
|
* A part of what we need to when the nested L2 guest exits and we want to
|
|
* run its L1 parent, is to reset L1's guest state to the host state specified
|
|
* in vmcs12.
|
|
* This function is to be called not only on normal nested exit, but also on
|
|
* a nested entry failure, as explained in Intel's spec, 3B.23.7 ("VM-Entry
|
|
* Failures During or After Loading Guest State").
|
|
* This function should be called when the active VMCS is L1's (vmcs01).
|
|
*/
|
|
static void load_vmcs12_host_state(struct kvm_vcpu *vcpu,
|
|
struct vmcs12 *vmcs12)
|
|
{
|
|
enum vm_entry_failure_code ignored;
|
|
struct kvm_segment seg;
|
|
|
|
if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_EFER)
|
|
vcpu->arch.efer = vmcs12->host_ia32_efer;
|
|
else if (vmcs12->vm_exit_controls & VM_EXIT_HOST_ADDR_SPACE_SIZE)
|
|
vcpu->arch.efer |= (EFER_LMA | EFER_LME);
|
|
else
|
|
vcpu->arch.efer &= ~(EFER_LMA | EFER_LME);
|
|
vmx_set_efer(vcpu, vcpu->arch.efer);
|
|
|
|
kvm_rsp_write(vcpu, vmcs12->host_rsp);
|
|
kvm_rip_write(vcpu, vmcs12->host_rip);
|
|
vmx_set_rflags(vcpu, X86_EFLAGS_FIXED);
|
|
vmx_set_interrupt_shadow(vcpu, 0);
|
|
|
|
/*
|
|
* Note that calling vmx_set_cr0 is important, even if cr0 hasn't
|
|
* actually changed, because vmx_set_cr0 refers to efer set above.
|
|
*
|
|
* CR0_GUEST_HOST_MASK is already set in the original vmcs01
|
|
* (KVM doesn't change it);
|
|
*/
|
|
vcpu->arch.cr0_guest_owned_bits = KVM_POSSIBLE_CR0_GUEST_BITS;
|
|
vmx_set_cr0(vcpu, vmcs12->host_cr0);
|
|
|
|
/* Same as above - no reason to call set_cr4_guest_host_mask(). */
|
|
vcpu->arch.cr4_guest_owned_bits = ~vmcs_readl(CR4_GUEST_HOST_MASK);
|
|
vmx_set_cr4(vcpu, vmcs12->host_cr4);
|
|
|
|
nested_ept_uninit_mmu_context(vcpu);
|
|
|
|
/*
|
|
* Only PDPTE load can fail as the value of cr3 was checked on entry and
|
|
* couldn't have changed.
|
|
*/
|
|
if (nested_vmx_load_cr3(vcpu, vmcs12->host_cr3, false, true, &ignored))
|
|
nested_vmx_abort(vcpu, VMX_ABORT_LOAD_HOST_PDPTE_FAIL);
|
|
|
|
nested_vmx_transition_tlb_flush(vcpu, vmcs12, false);
|
|
|
|
vmcs_write32(GUEST_SYSENTER_CS, vmcs12->host_ia32_sysenter_cs);
|
|
vmcs_writel(GUEST_SYSENTER_ESP, vmcs12->host_ia32_sysenter_esp);
|
|
vmcs_writel(GUEST_SYSENTER_EIP, vmcs12->host_ia32_sysenter_eip);
|
|
vmcs_writel(GUEST_IDTR_BASE, vmcs12->host_idtr_base);
|
|
vmcs_writel(GUEST_GDTR_BASE, vmcs12->host_gdtr_base);
|
|
vmcs_write32(GUEST_IDTR_LIMIT, 0xFFFF);
|
|
vmcs_write32(GUEST_GDTR_LIMIT, 0xFFFF);
|
|
|
|
/* If not VM_EXIT_CLEAR_BNDCFGS, the L2 value propagates to L1. */
|
|
if (vmcs12->vm_exit_controls & VM_EXIT_CLEAR_BNDCFGS)
|
|
vmcs_write64(GUEST_BNDCFGS, 0);
|
|
|
|
if (vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_PAT) {
|
|
vmcs_write64(GUEST_IA32_PAT, vmcs12->host_ia32_pat);
|
|
vcpu->arch.pat = vmcs12->host_ia32_pat;
|
|
}
|
|
if ((vmcs12->vm_exit_controls & VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL) &&
|
|
intel_pmu_has_perf_global_ctrl(vcpu_to_pmu(vcpu)))
|
|
WARN_ON_ONCE(kvm_set_msr(vcpu, MSR_CORE_PERF_GLOBAL_CTRL,
|
|
vmcs12->host_ia32_perf_global_ctrl));
|
|
|
|
/* Set L1 segment info according to Intel SDM
|
|
27.5.2 Loading Host Segment and Descriptor-Table Registers */
|
|
seg = (struct kvm_segment) {
|
|
.base = 0,
|
|
.limit = 0xFFFFFFFF,
|
|
.selector = vmcs12->host_cs_selector,
|
|
.type = 11,
|
|
.present = 1,
|
|
.s = 1,
|
|
.g = 1
|
|
};
|
|
if (vmcs12->vm_exit_controls & VM_EXIT_HOST_ADDR_SPACE_SIZE)
|
|
seg.l = 1;
|
|
else
|
|
seg.db = 1;
|
|
__vmx_set_segment(vcpu, &seg, VCPU_SREG_CS);
|
|
seg = (struct kvm_segment) {
|
|
.base = 0,
|
|
.limit = 0xFFFFFFFF,
|
|
.type = 3,
|
|
.present = 1,
|
|
.s = 1,
|
|
.db = 1,
|
|
.g = 1
|
|
};
|
|
seg.selector = vmcs12->host_ds_selector;
|
|
__vmx_set_segment(vcpu, &seg, VCPU_SREG_DS);
|
|
seg.selector = vmcs12->host_es_selector;
|
|
__vmx_set_segment(vcpu, &seg, VCPU_SREG_ES);
|
|
seg.selector = vmcs12->host_ss_selector;
|
|
__vmx_set_segment(vcpu, &seg, VCPU_SREG_SS);
|
|
seg.selector = vmcs12->host_fs_selector;
|
|
seg.base = vmcs12->host_fs_base;
|
|
__vmx_set_segment(vcpu, &seg, VCPU_SREG_FS);
|
|
seg.selector = vmcs12->host_gs_selector;
|
|
seg.base = vmcs12->host_gs_base;
|
|
__vmx_set_segment(vcpu, &seg, VCPU_SREG_GS);
|
|
seg = (struct kvm_segment) {
|
|
.base = vmcs12->host_tr_base,
|
|
.limit = 0x67,
|
|
.selector = vmcs12->host_tr_selector,
|
|
.type = 11,
|
|
.present = 1
|
|
};
|
|
__vmx_set_segment(vcpu, &seg, VCPU_SREG_TR);
|
|
|
|
memset(&seg, 0, sizeof(seg));
|
|
seg.unusable = 1;
|
|
__vmx_set_segment(vcpu, &seg, VCPU_SREG_LDTR);
|
|
|
|
kvm_set_dr(vcpu, 7, 0x400);
|
|
vmcs_write64(GUEST_IA32_DEBUGCTL, 0);
|
|
|
|
if (nested_vmx_load_msr(vcpu, vmcs12->vm_exit_msr_load_addr,
|
|
vmcs12->vm_exit_msr_load_count))
|
|
nested_vmx_abort(vcpu, VMX_ABORT_LOAD_HOST_MSR_FAIL);
|
|
|
|
to_vmx(vcpu)->emulation_required = vmx_emulation_required(vcpu);
|
|
}
|
|
|
|
static inline u64 nested_vmx_get_vmcs01_guest_efer(struct vcpu_vmx *vmx)
|
|
{
|
|
struct vmx_uret_msr *efer_msr;
|
|
unsigned int i;
|
|
|
|
if (vm_entry_controls_get(vmx) & VM_ENTRY_LOAD_IA32_EFER)
|
|
return vmcs_read64(GUEST_IA32_EFER);
|
|
|
|
if (cpu_has_load_ia32_efer())
|
|
return host_efer;
|
|
|
|
for (i = 0; i < vmx->msr_autoload.guest.nr; ++i) {
|
|
if (vmx->msr_autoload.guest.val[i].index == MSR_EFER)
|
|
return vmx->msr_autoload.guest.val[i].value;
|
|
}
|
|
|
|
efer_msr = vmx_find_uret_msr(vmx, MSR_EFER);
|
|
if (efer_msr)
|
|
return efer_msr->data;
|
|
|
|
return host_efer;
|
|
}
|
|
|
|
static void nested_vmx_restore_host_state(struct kvm_vcpu *vcpu)
|
|
{
|
|
struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
|
|
struct vcpu_vmx *vmx = to_vmx(vcpu);
|
|
struct vmx_msr_entry g, h;
|
|
gpa_t gpa;
|
|
u32 i, j;
|
|
|
|
vcpu->arch.pat = vmcs_read64(GUEST_IA32_PAT);
|
|
|
|
if (vmcs12->vm_entry_controls & VM_ENTRY_LOAD_DEBUG_CONTROLS) {
|
|
/*
|
|
* L1's host DR7 is lost if KVM_GUESTDBG_USE_HW_BP is set
|
|
* as vmcs01.GUEST_DR7 contains a userspace defined value
|
|
* and vcpu->arch.dr7 is not squirreled away before the
|
|
* nested VMENTER (not worth adding a variable in nested_vmx).
|
|
*/
|
|
if (vcpu->guest_debug & KVM_GUESTDBG_USE_HW_BP)
|
|
kvm_set_dr(vcpu, 7, DR7_FIXED_1);
|
|
else
|
|
WARN_ON(kvm_set_dr(vcpu, 7, vmcs_readl(GUEST_DR7)));
|
|
}
|
|
|
|
/*
|
|
* Note that calling vmx_set_{efer,cr0,cr4} is important as they
|
|
* handle a variety of side effects to KVM's software model.
|
|
*/
|
|
vmx_set_efer(vcpu, nested_vmx_get_vmcs01_guest_efer(vmx));
|
|
|
|
vcpu->arch.cr0_guest_owned_bits = KVM_POSSIBLE_CR0_GUEST_BITS;
|
|
vmx_set_cr0(vcpu, vmcs_readl(CR0_READ_SHADOW));
|
|
|
|
vcpu->arch.cr4_guest_owned_bits = ~vmcs_readl(CR4_GUEST_HOST_MASK);
|
|
vmx_set_cr4(vcpu, vmcs_readl(CR4_READ_SHADOW));
|
|
|
|
nested_ept_uninit_mmu_context(vcpu);
|
|
vcpu->arch.cr3 = vmcs_readl(GUEST_CR3);
|
|
kvm_register_mark_available(vcpu, VCPU_EXREG_CR3);
|
|
|
|
/*
|
|
* Use ept_save_pdptrs(vcpu) to load the MMU's cached PDPTRs
|
|
* from vmcs01 (if necessary). The PDPTRs are not loaded on
|
|
* VMFail, like everything else we just need to ensure our
|
|
* software model is up-to-date.
|
|
*/
|
|
if (enable_ept && is_pae_paging(vcpu))
|
|
ept_save_pdptrs(vcpu);
|
|
|
|
kvm_mmu_reset_context(vcpu);
|
|
|
|
/*
|
|
* This nasty bit of open coding is a compromise between blindly
|
|
* loading L1's MSRs using the exit load lists (incorrect emulation
|
|
* of VMFail), leaving the nested VM's MSRs in the software model
|
|
* (incorrect behavior) and snapshotting the modified MSRs (too
|
|
* expensive since the lists are unbound by hardware). For each
|
|
* MSR that was (prematurely) loaded from the nested VMEntry load
|
|
* list, reload it from the exit load list if it exists and differs
|
|
* from the guest value. The intent is to stuff host state as
|
|
* silently as possible, not to fully process the exit load list.
|
|
*/
|
|
for (i = 0; i < vmcs12->vm_entry_msr_load_count; i++) {
|
|
gpa = vmcs12->vm_entry_msr_load_addr + (i * sizeof(g));
|
|
if (kvm_vcpu_read_guest(vcpu, gpa, &g, sizeof(g))) {
|
|
pr_debug_ratelimited(
|
|
"%s read MSR index failed (%u, 0x%08llx)\n",
|
|
__func__, i, gpa);
|
|
goto vmabort;
|
|
}
|
|
|
|
for (j = 0; j < vmcs12->vm_exit_msr_load_count; j++) {
|
|
gpa = vmcs12->vm_exit_msr_load_addr + (j * sizeof(h));
|
|
if (kvm_vcpu_read_guest(vcpu, gpa, &h, sizeof(h))) {
|
|
pr_debug_ratelimited(
|
|
"%s read MSR failed (%u, 0x%08llx)\n",
|
|
__func__, j, gpa);
|
|
goto vmabort;
|
|
}
|
|
if (h.index != g.index)
|
|
continue;
|
|
if (h.value == g.value)
|
|
break;
|
|
|
|
if (nested_vmx_load_msr_check(vcpu, &h)) {
|
|
pr_debug_ratelimited(
|
|
"%s check failed (%u, 0x%x, 0x%x)\n",
|
|
__func__, j, h.index, h.reserved);
|
|
goto vmabort;
|
|
}
|
|
|
|
if (kvm_set_msr(vcpu, h.index, h.value)) {
|
|
pr_debug_ratelimited(
|
|
"%s WRMSR failed (%u, 0x%x, 0x%llx)\n",
|
|
__func__, j, h.index, h.value);
|
|
goto vmabort;
|
|
}
|
|
}
|
|
}
|
|
|
|
return;
|
|
|
|
vmabort:
|
|
nested_vmx_abort(vcpu, VMX_ABORT_LOAD_HOST_MSR_FAIL);
|
|
}
|
|
|
|
/*
|
|
* Emulate an exit from nested guest (L2) to L1, i.e., prepare to run L1
|
|
* and modify vmcs12 to make it see what it would expect to see there if
|
|
* L2 was its real guest. Must only be called when in L2 (is_guest_mode())
|
|
*/
|
|
void nested_vmx_vmexit(struct kvm_vcpu *vcpu, u32 vm_exit_reason,
|
|
u32 exit_intr_info, unsigned long exit_qualification)
|
|
{
|
|
struct vcpu_vmx *vmx = to_vmx(vcpu);
|
|
struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
|
|
|
|
/* trying to cancel vmlaunch/vmresume is a bug */
|
|
WARN_ON_ONCE(vmx->nested.nested_run_pending);
|
|
|
|
if (kvm_check_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu)) {
|
|
/*
|
|
* KVM_REQ_GET_NESTED_STATE_PAGES is also used to map
|
|
* Enlightened VMCS after migration and we still need to
|
|
* do that when something is forcing L2->L1 exit prior to
|
|
* the first L2 run.
|
|
*/
|
|
(void)nested_get_evmcs_page(vcpu);
|
|
}
|
|
|
|
/* Service pending TLB flush requests for L2 before switching to L1. */
|
|
kvm_service_local_tlb_flush_requests(vcpu);
|
|
|
|
/*
|
|
* VCPU_EXREG_PDPTR will be clobbered in arch/x86/kvm/vmx/vmx.h between
|
|
* now and the new vmentry. Ensure that the VMCS02 PDPTR fields are
|
|
* up-to-date before switching to L1.
|
|
*/
|
|
if (enable_ept && is_pae_paging(vcpu))
|
|
vmx_ept_load_pdptrs(vcpu);
|
|
|
|
leave_guest_mode(vcpu);
|
|
|
|
if (nested_cpu_has_preemption_timer(vmcs12))
|
|
hrtimer_cancel(&to_vmx(vcpu)->nested.preemption_timer);
|
|
|
|
if (nested_cpu_has(vmcs12, CPU_BASED_USE_TSC_OFFSETTING)) {
|
|
vcpu->arch.tsc_offset = vcpu->arch.l1_tsc_offset;
|
|
if (nested_cpu_has2(vmcs12, SECONDARY_EXEC_TSC_SCALING))
|
|
vcpu->arch.tsc_scaling_ratio = vcpu->arch.l1_tsc_scaling_ratio;
|
|
}
|
|
|
|
if (likely(!vmx->fail)) {
|
|
sync_vmcs02_to_vmcs12(vcpu, vmcs12);
|
|
|
|
if (vm_exit_reason != -1)
|
|
prepare_vmcs12(vcpu, vmcs12, vm_exit_reason,
|
|
exit_intr_info, exit_qualification);
|
|
|
|
/*
|
|
* Must happen outside of sync_vmcs02_to_vmcs12() as it will
|
|
* also be used to capture vmcs12 cache as part of
|
|
* capturing nVMX state for snapshot (migration).
|
|
*
|
|
* Otherwise, this flush will dirty guest memory at a
|
|
* point it is already assumed by user-space to be
|
|
* immutable.
|
|
*/
|
|
nested_flush_cached_shadow_vmcs12(vcpu, vmcs12);
|
|
} else {
|
|
/*
|
|
* The only expected VM-instruction error is "VM entry with
|
|
* invalid control field(s)." Anything else indicates a
|
|
* problem with L0. And we should never get here with a
|
|
* VMFail of any type if early consistency checks are enabled.
|
|
*/
|
|
WARN_ON_ONCE(vmcs_read32(VM_INSTRUCTION_ERROR) !=
|
|
VMXERR_ENTRY_INVALID_CONTROL_FIELD);
|
|
WARN_ON_ONCE(nested_early_check);
|
|
}
|
|
|
|
/*
|
|
* Drop events/exceptions that were queued for re-injection to L2
|
|
* (picked up via vmx_complete_interrupts()), as well as exceptions
|
|
* that were pending for L2. Note, this must NOT be hoisted above
|
|
* prepare_vmcs12(), events/exceptions queued for re-injection need to
|
|
* be captured in vmcs12 (see vmcs12_save_pending_event()).
|
|
*/
|
|
vcpu->arch.nmi_injected = false;
|
|
kvm_clear_exception_queue(vcpu);
|
|
kvm_clear_interrupt_queue(vcpu);
|
|
|
|
vmx_switch_vmcs(vcpu, &vmx->vmcs01);
|
|
|
|
/*
|
|
* If IBRS is advertised to the vCPU, KVM must flush the indirect
|
|
* branch predictors when transitioning from L2 to L1, as L1 expects
|
|
* hardware (KVM in this case) to provide separate predictor modes.
|
|
* Bare metal isolates VMX root (host) from VMX non-root (guest), but
|
|
* doesn't isolate different VMCSs, i.e. in this case, doesn't provide
|
|
* separate modes for L2 vs L1.
|
|
*/
|
|
if (guest_cpuid_has(vcpu, X86_FEATURE_SPEC_CTRL))
|
|
indirect_branch_prediction_barrier();
|
|
|
|
/* Update any VMCS fields that might have changed while L2 ran */
|
|
vmcs_write32(VM_EXIT_MSR_LOAD_COUNT, vmx->msr_autoload.host.nr);
|
|
vmcs_write32(VM_ENTRY_MSR_LOAD_COUNT, vmx->msr_autoload.guest.nr);
|
|
vmcs_write64(TSC_OFFSET, vcpu->arch.tsc_offset);
|
|
if (kvm_has_tsc_control)
|
|
vmcs_write64(TSC_MULTIPLIER, vcpu->arch.tsc_scaling_ratio);
|
|
|
|
if (vmx->nested.l1_tpr_threshold != -1)
|
|
vmcs_write32(TPR_THRESHOLD, vmx->nested.l1_tpr_threshold);
|
|
|
|
if (vmx->nested.change_vmcs01_virtual_apic_mode) {
|
|
vmx->nested.change_vmcs01_virtual_apic_mode = false;
|
|
vmx_set_virtual_apic_mode(vcpu);
|
|
}
|
|
|
|
if (vmx->nested.update_vmcs01_cpu_dirty_logging) {
|
|
vmx->nested.update_vmcs01_cpu_dirty_logging = false;
|
|
vmx_update_cpu_dirty_logging(vcpu);
|
|
}
|
|
|
|
/* Unpin physical memory we referred to in vmcs02 */
|
|
if (vmx->nested.apic_access_page) {
|
|
kvm_release_page_clean(vmx->nested.apic_access_page);
|
|
vmx->nested.apic_access_page = NULL;
|
|
}
|
|
kvm_vcpu_unmap(vcpu, &vmx->nested.virtual_apic_map, true);
|
|
kvm_vcpu_unmap(vcpu, &vmx->nested.pi_desc_map, true);
|
|
vmx->nested.pi_desc = NULL;
|
|
|
|
if (vmx->nested.reload_vmcs01_apic_access_page) {
|
|
vmx->nested.reload_vmcs01_apic_access_page = false;
|
|
kvm_make_request(KVM_REQ_APIC_PAGE_RELOAD, vcpu);
|
|
}
|
|
|
|
if (vmx->nested.update_vmcs01_apicv_status) {
|
|
vmx->nested.update_vmcs01_apicv_status = false;
|
|
kvm_make_request(KVM_REQ_APICV_UPDATE, vcpu);
|
|
}
|
|
|
|
if ((vm_exit_reason != -1) &&
|
|
(enable_shadow_vmcs || evmptr_is_valid(vmx->nested.hv_evmcs_vmptr)))
|
|
vmx->nested.need_vmcs12_to_shadow_sync = true;
|
|
|
|
/* in case we halted in L2 */
|
|
vcpu->arch.mp_state = KVM_MP_STATE_RUNNABLE;
|
|
|
|
if (likely(!vmx->fail)) {
|
|
if ((u16)vm_exit_reason == EXIT_REASON_EXTERNAL_INTERRUPT &&
|
|
nested_exit_intr_ack_set(vcpu)) {
|
|
int irq = kvm_cpu_get_interrupt(vcpu);
|
|
WARN_ON(irq < 0);
|
|
vmcs12->vm_exit_intr_info = irq |
|
|
INTR_INFO_VALID_MASK | INTR_TYPE_EXT_INTR;
|
|
}
|
|
|
|
if (vm_exit_reason != -1)
|
|
trace_kvm_nested_vmexit_inject(vmcs12->vm_exit_reason,
|
|
vmcs12->exit_qualification,
|
|
vmcs12->idt_vectoring_info_field,
|
|
vmcs12->vm_exit_intr_info,
|
|
vmcs12->vm_exit_intr_error_code,
|
|
KVM_ISA_VMX);
|
|
|
|
load_vmcs12_host_state(vcpu, vmcs12);
|
|
|
|
return;
|
|
}
|
|
|
|
/*
|
|
* After an early L2 VM-entry failure, we're now back
|
|
* in L1 which thinks it just finished a VMLAUNCH or
|
|
* VMRESUME instruction, so we need to set the failure
|
|
* flag and the VM-instruction error field of the VMCS
|
|
* accordingly, and skip the emulated instruction.
|
|
*/
|
|
(void)nested_vmx_fail(vcpu, VMXERR_ENTRY_INVALID_CONTROL_FIELD);
|
|
|
|
/*
|
|
* Restore L1's host state to KVM's software model. We're here
|
|
* because a consistency check was caught by hardware, which
|
|
* means some amount of guest state has been propagated to KVM's
|
|
* model and needs to be unwound to the host's state.
|
|
*/
|
|
nested_vmx_restore_host_state(vcpu);
|
|
|
|
vmx->fail = 0;
|
|
}
|
|
|
|
static void nested_vmx_triple_fault(struct kvm_vcpu *vcpu)
|
|
{
|
|
nested_vmx_vmexit(vcpu, EXIT_REASON_TRIPLE_FAULT, 0, 0);
|
|
}
|
|
|
|
/*
|
|
* Decode the memory-address operand of a vmx instruction, as recorded on an
|
|
* exit caused by such an instruction (run by a guest hypervisor).
|
|
* On success, returns 0. When the operand is invalid, returns 1 and throws
|
|
* #UD, #GP, or #SS.
|
|
*/
|
|
int get_vmx_mem_address(struct kvm_vcpu *vcpu, unsigned long exit_qualification,
|
|
u32 vmx_instruction_info, bool wr, int len, gva_t *ret)
|
|
{
|
|
gva_t off;
|
|
bool exn;
|
|
struct kvm_segment s;
|
|
|
|
/*
|
|
* According to Vol. 3B, "Information for VM Exits Due to Instruction
|
|
* Execution", on an exit, vmx_instruction_info holds most of the
|
|
* addressing components of the operand. Only the displacement part
|
|
* is put in exit_qualification (see 3B, "Basic VM-Exit Information").
|
|
* For how an actual address is calculated from all these components,
|
|
* refer to Vol. 1, "Operand Addressing".
|
|
*/
|
|
int scaling = vmx_instruction_info & 3;
|
|
int addr_size = (vmx_instruction_info >> 7) & 7;
|
|
bool is_reg = vmx_instruction_info & (1u << 10);
|
|
int seg_reg = (vmx_instruction_info >> 15) & 7;
|
|
int index_reg = (vmx_instruction_info >> 18) & 0xf;
|
|
bool index_is_valid = !(vmx_instruction_info & (1u << 22));
|
|
int base_reg = (vmx_instruction_info >> 23) & 0xf;
|
|
bool base_is_valid = !(vmx_instruction_info & (1u << 27));
|
|
|
|
if (is_reg) {
|
|
kvm_queue_exception(vcpu, UD_VECTOR);
|
|
return 1;
|
|
}
|
|
|
|
/* Addr = segment_base + offset */
|
|
/* offset = base + [index * scale] + displacement */
|
|
off = exit_qualification; /* holds the displacement */
|
|
if (addr_size == 1)
|
|
off = (gva_t)sign_extend64(off, 31);
|
|
else if (addr_size == 0)
|
|
off = (gva_t)sign_extend64(off, 15);
|
|
if (base_is_valid)
|
|
off += kvm_register_read(vcpu, base_reg);
|
|
if (index_is_valid)
|
|
off += kvm_register_read(vcpu, index_reg) << scaling;
|
|
vmx_get_segment(vcpu, &s, seg_reg);
|
|
|
|
/*
|
|
* The effective address, i.e. @off, of a memory operand is truncated
|
|
* based on the address size of the instruction. Note that this is
|
|
* the *effective address*, i.e. the address prior to accounting for
|
|
* the segment's base.
|
|
*/
|
|
if (addr_size == 1) /* 32 bit */
|
|
off &= 0xffffffff;
|
|
else if (addr_size == 0) /* 16 bit */
|
|
off &= 0xffff;
|
|
|
|
/* Checks for #GP/#SS exceptions. */
|
|
exn = false;
|
|
if (is_long_mode(vcpu)) {
|
|
/*
|
|
* The virtual/linear address is never truncated in 64-bit
|
|
* mode, e.g. a 32-bit address size can yield a 64-bit virtual
|
|
* address when using FS/GS with a non-zero base.
|
|
*/
|
|
if (seg_reg == VCPU_SREG_FS || seg_reg == VCPU_SREG_GS)
|
|
*ret = s.base + off;
|
|
else
|
|
*ret = off;
|
|
|
|
/* Long mode: #GP(0)/#SS(0) if the memory address is in a
|
|
* non-canonical form. This is the only check on the memory
|
|
* destination for long mode!
|
|
*/
|
|
exn = is_noncanonical_address(*ret, vcpu);
|
|
} else {
|
|
/*
|
|
* When not in long mode, the virtual/linear address is
|
|
* unconditionally truncated to 32 bits regardless of the
|
|
* address size.
|
|
*/
|
|
*ret = (s.base + off) & 0xffffffff;
|
|
|
|
/* Protected mode: apply checks for segment validity in the
|
|
* following order:
|
|
* - segment type check (#GP(0) may be thrown)
|
|
* - usability check (#GP(0)/#SS(0))
|
|
* - limit check (#GP(0)/#SS(0))
|
|
*/
|
|
if (wr)
|
|
/* #GP(0) if the destination operand is located in a
|
|
* read-only data segment or any code segment.
|
|
*/
|
|
exn = ((s.type & 0xa) == 0 || (s.type & 8));
|
|
else
|
|
/* #GP(0) if the source operand is located in an
|
|
* execute-only code segment
|
|
*/
|
|
exn = ((s.type & 0xa) == 8);
|
|
if (exn) {
|
|
kvm_queue_exception_e(vcpu, GP_VECTOR, 0);
|
|
return 1;
|
|
}
|
|
/* Protected mode: #GP(0)/#SS(0) if the segment is unusable.
|
|
*/
|
|
exn = (s.unusable != 0);
|
|
|
|
/*
|
|
* Protected mode: #GP(0)/#SS(0) if the memory operand is
|
|
* outside the segment limit. All CPUs that support VMX ignore
|
|
* limit checks for flat segments, i.e. segments with base==0,
|
|
* limit==0xffffffff and of type expand-up data or code.
|
|
*/
|
|
if (!(s.base == 0 && s.limit == 0xffffffff &&
|
|
((s.type & 8) || !(s.type & 4))))
|
|
exn = exn || ((u64)off + len - 1 > s.limit);
|
|
}
|
|
if (exn) {
|
|
kvm_queue_exception_e(vcpu,
|
|
seg_reg == VCPU_SREG_SS ?
|
|
SS_VECTOR : GP_VECTOR,
|
|
0);
|
|
return 1;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
void nested_vmx_pmu_entry_exit_ctls_update(struct kvm_vcpu *vcpu)
|
|
{
|
|
struct vcpu_vmx *vmx;
|
|
|
|
if (!nested_vmx_allowed(vcpu))
|
|
return;
|
|
|
|
vmx = to_vmx(vcpu);
|
|
if (kvm_x86_ops.pmu_ops->is_valid_msr(vcpu, MSR_CORE_PERF_GLOBAL_CTRL)) {
|
|
vmx->nested.msrs.entry_ctls_high |=
|
|
VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL;
|
|
vmx->nested.msrs.exit_ctls_high |=
|
|
VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL;
|
|
} else {
|
|
vmx->nested.msrs.entry_ctls_high &=
|
|
~VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL;
|
|
vmx->nested.msrs.exit_ctls_high &=
|
|
~VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL;
|
|
}
|
|
}
|
|
|
|
static int nested_vmx_get_vmptr(struct kvm_vcpu *vcpu, gpa_t *vmpointer,
|
|
int *ret)
|
|
{
|
|
gva_t gva;
|
|
struct x86_exception e;
|
|
int r;
|
|
|
|
if (get_vmx_mem_address(vcpu, vmx_get_exit_qual(vcpu),
|
|
vmcs_read32(VMX_INSTRUCTION_INFO), false,
|
|
sizeof(*vmpointer), &gva)) {
|
|
*ret = 1;
|
|
return -EINVAL;
|
|
}
|
|
|
|
r = kvm_read_guest_virt(vcpu, gva, vmpointer, sizeof(*vmpointer), &e);
|
|
if (r != X86EMUL_CONTINUE) {
|
|
*ret = kvm_handle_memory_failure(vcpu, r, &e);
|
|
return -EINVAL;
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
/*
|
|
* Allocate a shadow VMCS and associate it with the currently loaded
|
|
* VMCS, unless such a shadow VMCS already exists. The newly allocated
|
|
* VMCS is also VMCLEARed, so that it is ready for use.
|
|
*/
|
|
static struct vmcs *alloc_shadow_vmcs(struct kvm_vcpu *vcpu)
|
|
{
|
|
struct vcpu_vmx *vmx = to_vmx(vcpu);
|
|
struct loaded_vmcs *loaded_vmcs = vmx->loaded_vmcs;
|
|
|
|
/*
|
|
* We should allocate a shadow vmcs for vmcs01 only when L1
|
|
* executes VMXON and free it when L1 executes VMXOFF.
|
|
* As it is invalid to execute VMXON twice, we shouldn't reach
|
|
* here when vmcs01 already have an allocated shadow vmcs.
|
|
*/
|
|
WARN_ON(loaded_vmcs == &vmx->vmcs01 && loaded_vmcs->shadow_vmcs);
|
|
|
|
if (!loaded_vmcs->shadow_vmcs) {
|
|
loaded_vmcs->shadow_vmcs = alloc_vmcs(true);
|
|
if (loaded_vmcs->shadow_vmcs)
|
|
vmcs_clear(loaded_vmcs->shadow_vmcs);
|
|
}
|
|
return loaded_vmcs->shadow_vmcs;
|
|
}
|
|
|
|
static int enter_vmx_operation(struct kvm_vcpu *vcpu)
|
|
{
|
|
struct vcpu_vmx *vmx = to_vmx(vcpu);
|
|
int r;
|
|
|
|
r = alloc_loaded_vmcs(&vmx->nested.vmcs02);
|
|
if (r < 0)
|
|
goto out_vmcs02;
|
|
|
|
vmx->nested.cached_vmcs12 = kzalloc(VMCS12_SIZE, GFP_KERNEL_ACCOUNT);
|
|
if (!vmx->nested.cached_vmcs12)
|
|
goto out_cached_vmcs12;
|
|
|
|
vmx->nested.cached_shadow_vmcs12 = kzalloc(VMCS12_SIZE, GFP_KERNEL_ACCOUNT);
|
|
if (!vmx->nested.cached_shadow_vmcs12)
|
|
goto out_cached_shadow_vmcs12;
|
|
|
|
if (enable_shadow_vmcs && !alloc_shadow_vmcs(vcpu))
|
|
goto out_shadow_vmcs;
|
|
|
|
hrtimer_init(&vmx->nested.preemption_timer, CLOCK_MONOTONIC,
|
|
HRTIMER_MODE_ABS_PINNED);
|
|
vmx->nested.preemption_timer.function = vmx_preemption_timer_fn;
|
|
|
|
vmx->nested.vpid02 = allocate_vpid();
|
|
|
|
vmx->nested.vmcs02_initialized = false;
|
|
vmx->nested.vmxon = true;
|
|
|
|
if (vmx_pt_mode_is_host_guest()) {
|
|
vmx->pt_desc.guest.ctl = 0;
|
|
pt_update_intercept_for_msr(vcpu);
|
|
}
|
|
|
|
return 0;
|
|
|
|
out_shadow_vmcs:
|
|
kfree(vmx->nested.cached_shadow_vmcs12);
|
|
|
|
out_cached_shadow_vmcs12:
|
|
kfree(vmx->nested.cached_vmcs12);
|
|
|
|
out_cached_vmcs12:
|
|
free_loaded_vmcs(&vmx->nested.vmcs02);
|
|
|
|
out_vmcs02:
|
|
return -ENOMEM;
|
|
}
|
|
|
|
/* Emulate the VMXON instruction. */
|
|
static int handle_vmon(struct kvm_vcpu *vcpu)
|
|
{
|
|
int ret;
|
|
gpa_t vmptr;
|
|
uint32_t revision;
|
|
struct vcpu_vmx *vmx = to_vmx(vcpu);
|
|
const u64 VMXON_NEEDED_FEATURES = FEAT_CTL_LOCKED
|
|
| FEAT_CTL_VMX_ENABLED_OUTSIDE_SMX;
|
|
|
|
/*
|
|
* Manually check CR4.VMXE checks, KVM must force CR4.VMXE=1 to enter
|
|
* the guest and so cannot rely on hardware to perform the check,
|
|
* which has higher priority than VM-Exit (see Intel SDM's pseudocode
|
|
* for VMXON).
|
|
*
|
|
* Rely on hardware for the other pre-VM-Exit checks, CR0.PE=1, !VM86
|
|
* and !COMPATIBILITY modes. For an unrestricted guest, KVM doesn't
|
|
* force any of the relevant guest state. For a restricted guest, KVM
|
|
* does force CR0.PE=1, but only to also force VM86 in order to emulate
|
|
* Real Mode, and so there's no need to check CR0.PE manually.
|
|
*/
|
|
if (!kvm_read_cr4_bits(vcpu, X86_CR4_VMXE)) {
|
|
kvm_queue_exception(vcpu, UD_VECTOR);
|
|
return 1;
|
|
}
|
|
|
|
/*
|
|
* The CPL is checked for "not in VMX operation" and for "in VMX root",
|
|
* and has higher priority than the VM-Fail due to being post-VMXON,
|
|
* i.e. VMXON #GPs outside of VMX non-root if CPL!=0. In VMX non-root,
|
|
* VMXON causes VM-Exit and KVM unconditionally forwards VMXON VM-Exits
|
|
* from L2 to L1, i.e. there's no need to check for the vCPU being in
|
|
* VMX non-root.
|
|
*
|
|
* Forwarding the VM-Exit unconditionally, i.e. without performing the
|
|
* #UD checks (see above), is functionally ok because KVM doesn't allow
|
|
* L1 to run L2 without CR4.VMXE=0, and because KVM never modifies L2's
|
|
* CR0 or CR4, i.e. it's L2's responsibility to emulate #UDs that are
|
|
* missed by hardware due to shadowing CR0 and/or CR4.
|
|
*/
|
|
if (vmx_get_cpl(vcpu)) {
|
|
kvm_inject_gp(vcpu, 0);
|
|
return 1;
|
|
}
|
|
|
|
if (vmx->nested.vmxon)
|
|
return nested_vmx_fail(vcpu, VMXERR_VMXON_IN_VMX_ROOT_OPERATION);
|
|
|
|
/*
|
|
* Invalid CR0/CR4 generates #GP. These checks are performed if and
|
|
* only if the vCPU isn't already in VMX operation, i.e. effectively
|
|
* have lower priority than the VM-Fail above.
|
|
*/
|
|
if (!nested_host_cr0_valid(vcpu, kvm_read_cr0(vcpu)) ||
|
|
!nested_host_cr4_valid(vcpu, kvm_read_cr4(vcpu))) {
|
|
kvm_inject_gp(vcpu, 0);
|
|
return 1;
|
|
}
|
|
|
|
if ((vmx->msr_ia32_feature_control & VMXON_NEEDED_FEATURES)
|
|
!= VMXON_NEEDED_FEATURES) {
|
|
kvm_inject_gp(vcpu, 0);
|
|
return 1;
|
|
}
|
|
|
|
if (nested_vmx_get_vmptr(vcpu, &vmptr, &ret))
|
|
return ret;
|
|
|
|
/*
|
|
* SDM 3: 24.11.5
|
|
* The first 4 bytes of VMXON region contain the supported
|
|
* VMCS revision identifier
|
|
*
|
|
* Note - IA32_VMX_BASIC[48] will never be 1 for the nested case;
|
|
* which replaces physical address width with 32
|
|
*/
|
|
if (!page_address_valid(vcpu, vmptr))
|
|
return nested_vmx_failInvalid(vcpu);
|
|
|
|
if (kvm_read_guest(vcpu->kvm, vmptr, &revision, sizeof(revision)) ||
|
|
revision != VMCS12_REVISION)
|
|
return nested_vmx_failInvalid(vcpu);
|
|
|
|
vmx->nested.vmxon_ptr = vmptr;
|
|
ret = enter_vmx_operation(vcpu);
|
|
if (ret)
|
|
return ret;
|
|
|
|
return nested_vmx_succeed(vcpu);
|
|
}
|
|
|
|
static inline void nested_release_vmcs12(struct kvm_vcpu *vcpu)
|
|
{
|
|
struct vcpu_vmx *vmx = to_vmx(vcpu);
|
|
|
|
if (vmx->nested.current_vmptr == -1ull)
|
|
return;
|
|
|
|
copy_vmcs02_to_vmcs12_rare(vcpu, get_vmcs12(vcpu));
|
|
|
|
if (enable_shadow_vmcs) {
|
|
/* copy to memory all shadowed fields in case
|
|
they were modified */
|
|
copy_shadow_to_vmcs12(vmx);
|
|
vmx_disable_shadow_vmcs(vmx);
|
|
}
|
|
vmx->nested.posted_intr_nv = -1;
|
|
|
|
/* Flush VMCS12 to guest memory */
|
|
kvm_vcpu_write_guest_page(vcpu,
|
|
vmx->nested.current_vmptr >> PAGE_SHIFT,
|
|
vmx->nested.cached_vmcs12, 0, VMCS12_SIZE);
|
|
|
|
kvm_mmu_free_roots(vcpu, &vcpu->arch.guest_mmu, KVM_MMU_ROOTS_ALL);
|
|
|
|
vmx->nested.current_vmptr = -1ull;
|
|
}
|
|
|
|
/* Emulate the VMXOFF instruction */
|
|
static int handle_vmoff(struct kvm_vcpu *vcpu)
|
|
{
|
|
if (!nested_vmx_check_permission(vcpu))
|
|
return 1;
|
|
|
|
free_nested(vcpu);
|
|
|
|
/* Process a latched INIT during time CPU was in VMX operation */
|
|
kvm_make_request(KVM_REQ_EVENT, vcpu);
|
|
|
|
return nested_vmx_succeed(vcpu);
|
|
}
|
|
|
|
/* Emulate the VMCLEAR instruction */
|
|
static int handle_vmclear(struct kvm_vcpu *vcpu)
|
|
{
|
|
struct vcpu_vmx *vmx = to_vmx(vcpu);
|
|
u32 zero = 0;
|
|
gpa_t vmptr;
|
|
u64 evmcs_gpa;
|
|
int r;
|
|
|
|
if (!nested_vmx_check_permission(vcpu))
|
|
return 1;
|
|
|
|
if (nested_vmx_get_vmptr(vcpu, &vmptr, &r))
|
|
return r;
|
|
|
|
if (!page_address_valid(vcpu, vmptr))
|
|
return nested_vmx_fail(vcpu, VMXERR_VMCLEAR_INVALID_ADDRESS);
|
|
|
|
if (vmptr == vmx->nested.vmxon_ptr)
|
|
return nested_vmx_fail(vcpu, VMXERR_VMCLEAR_VMXON_POINTER);
|
|
|
|
/*
|
|
* When Enlightened VMEntry is enabled on the calling CPU we treat
|
|
* memory area pointer by vmptr as Enlightened VMCS (as there's no good
|
|
* way to distinguish it from VMCS12) and we must not corrupt it by
|
|
* writing to the non-existent 'launch_state' field. The area doesn't
|
|
* have to be the currently active EVMCS on the calling CPU and there's
|
|
* nothing KVM has to do to transition it from 'active' to 'non-active'
|
|
* state. It is possible that the area will stay mapped as
|
|
* vmx->nested.hv_evmcs but this shouldn't be a problem.
|
|
*/
|
|
if (likely(!vmx->nested.enlightened_vmcs_enabled ||
|
|
!nested_enlightened_vmentry(vcpu, &evmcs_gpa))) {
|
|
if (vmptr == vmx->nested.current_vmptr)
|
|
nested_release_vmcs12(vcpu);
|
|
|
|
kvm_vcpu_write_guest(vcpu,
|
|
vmptr + offsetof(struct vmcs12,
|
|
launch_state),
|
|
&zero, sizeof(zero));
|
|
} else if (vmx->nested.hv_evmcs && vmptr == vmx->nested.hv_evmcs_vmptr) {
|
|
nested_release_evmcs(vcpu);
|
|
}
|
|
|
|
return nested_vmx_succeed(vcpu);
|
|
}
|
|
|
|
/* Emulate the VMLAUNCH instruction */
|
|
static int handle_vmlaunch(struct kvm_vcpu *vcpu)
|
|
{
|
|
return nested_vmx_run(vcpu, true);
|
|
}
|
|
|
|
/* Emulate the VMRESUME instruction */
|
|
static int handle_vmresume(struct kvm_vcpu *vcpu)
|
|
{
|
|
|
|
return nested_vmx_run(vcpu, false);
|
|
}
|
|
|
|
static int handle_vmread(struct kvm_vcpu *vcpu)
|
|
{
|
|
struct vmcs12 *vmcs12 = is_guest_mode(vcpu) ? get_shadow_vmcs12(vcpu)
|
|
: get_vmcs12(vcpu);
|
|
unsigned long exit_qualification = vmx_get_exit_qual(vcpu);
|
|
u32 instr_info = vmcs_read32(VMX_INSTRUCTION_INFO);
|
|
struct vcpu_vmx *vmx = to_vmx(vcpu);
|
|
struct x86_exception e;
|
|
unsigned long field;
|
|
u64 value;
|
|
gva_t gva = 0;
|
|
short offset;
|
|
int len, r;
|
|
|
|
if (!nested_vmx_check_permission(vcpu))
|
|
return 1;
|
|
|
|
/*
|
|
* In VMX non-root operation, when the VMCS-link pointer is -1ull,
|
|
* any VMREAD sets the ALU flags for VMfailInvalid.
|
|
*/
|
|
if (vmx->nested.current_vmptr == -1ull ||
|
|
(is_guest_mode(vcpu) &&
|
|
get_vmcs12(vcpu)->vmcs_link_pointer == -1ull))
|
|
return nested_vmx_failInvalid(vcpu);
|
|
|
|
/* Decode instruction info and find the field to read */
|
|
field = kvm_register_read(vcpu, (((instr_info) >> 28) & 0xf));
|
|
|
|
offset = vmcs_field_to_offset(field);
|
|
if (offset < 0)
|
|
return nested_vmx_fail(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT);
|
|
|
|
if (!is_guest_mode(vcpu) && is_vmcs12_ext_field(field))
|
|
copy_vmcs02_to_vmcs12_rare(vcpu, vmcs12);
|
|
|
|
/* Read the field, zero-extended to a u64 value */
|
|
value = vmcs12_read_any(vmcs12, field, offset);
|
|
|
|
/*
|
|
* Now copy part of this value to register or memory, as requested.
|
|
* Note that the number of bits actually copied is 32 or 64 depending
|
|
* on the guest's mode (32 or 64 bit), not on the given field's length.
|
|
*/
|
|
if (instr_info & BIT(10)) {
|
|
kvm_register_write(vcpu, (((instr_info) >> 3) & 0xf), value);
|
|
} else {
|
|
len = is_64_bit_mode(vcpu) ? 8 : 4;
|
|
if (get_vmx_mem_address(vcpu, exit_qualification,
|
|
instr_info, true, len, &gva))
|
|
return 1;
|
|
/* _system ok, nested_vmx_check_permission has verified cpl=0 */
|
|
r = kvm_write_guest_virt_system(vcpu, gva, &value, len, &e);
|
|
if (r != X86EMUL_CONTINUE)
|
|
return kvm_handle_memory_failure(vcpu, r, &e);
|
|
}
|
|
|
|
return nested_vmx_succeed(vcpu);
|
|
}
|
|
|
|
static bool is_shadow_field_rw(unsigned long field)
|
|
{
|
|
switch (field) {
|
|
#define SHADOW_FIELD_RW(x, y) case x:
|
|
#include "vmcs_shadow_fields.h"
|
|
return true;
|
|
default:
|
|
break;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
static bool is_shadow_field_ro(unsigned long field)
|
|
{
|
|
switch (field) {
|
|
#define SHADOW_FIELD_RO(x, y) case x:
|
|
#include "vmcs_shadow_fields.h"
|
|
return true;
|
|
default:
|
|
break;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
static int handle_vmwrite(struct kvm_vcpu *vcpu)
|
|
{
|
|
struct vmcs12 *vmcs12 = is_guest_mode(vcpu) ? get_shadow_vmcs12(vcpu)
|
|
: get_vmcs12(vcpu);
|
|
unsigned long exit_qualification = vmx_get_exit_qual(vcpu);
|
|
u32 instr_info = vmcs_read32(VMX_INSTRUCTION_INFO);
|
|
struct vcpu_vmx *vmx = to_vmx(vcpu);
|
|
struct x86_exception e;
|
|
unsigned long field;
|
|
short offset;
|
|
gva_t gva;
|
|
int len, r;
|
|
|
|
/*
|
|
* The value to write might be 32 or 64 bits, depending on L1's long
|
|
* mode, and eventually we need to write that into a field of several
|
|
* possible lengths. The code below first zero-extends the value to 64
|
|
* bit (value), and then copies only the appropriate number of
|
|
* bits into the vmcs12 field.
|
|
*/
|
|
u64 value = 0;
|
|
|
|
if (!nested_vmx_check_permission(vcpu))
|
|
return 1;
|
|
|
|
/*
|
|
* In VMX non-root operation, when the VMCS-link pointer is -1ull,
|
|
* any VMWRITE sets the ALU flags for VMfailInvalid.
|
|
*/
|
|
if (vmx->nested.current_vmptr == -1ull ||
|
|
(is_guest_mode(vcpu) &&
|
|
get_vmcs12(vcpu)->vmcs_link_pointer == -1ull))
|
|
return nested_vmx_failInvalid(vcpu);
|
|
|
|
if (instr_info & BIT(10))
|
|
value = kvm_register_read(vcpu, (((instr_info) >> 3) & 0xf));
|
|
else {
|
|
len = is_64_bit_mode(vcpu) ? 8 : 4;
|
|
if (get_vmx_mem_address(vcpu, exit_qualification,
|
|
instr_info, false, len, &gva))
|
|
return 1;
|
|
r = kvm_read_guest_virt(vcpu, gva, &value, len, &e);
|
|
if (r != X86EMUL_CONTINUE)
|
|
return kvm_handle_memory_failure(vcpu, r, &e);
|
|
}
|
|
|
|
field = kvm_register_read(vcpu, (((instr_info) >> 28) & 0xf));
|
|
|
|
offset = vmcs_field_to_offset(field);
|
|
if (offset < 0)
|
|
return nested_vmx_fail(vcpu, VMXERR_UNSUPPORTED_VMCS_COMPONENT);
|
|
|
|
/*
|
|
* If the vCPU supports "VMWRITE to any supported field in the
|
|
* VMCS," then the "read-only" fields are actually read/write.
|
|
*/
|
|
if (vmcs_field_readonly(field) &&
|
|
!nested_cpu_has_vmwrite_any_field(vcpu))
|
|
return nested_vmx_fail(vcpu, VMXERR_VMWRITE_READ_ONLY_VMCS_COMPONENT);
|
|
|
|
/*
|
|
* Ensure vmcs12 is up-to-date before any VMWRITE that dirties
|
|
* vmcs12, else we may crush a field or consume a stale value.
|
|
*/
|
|
if (!is_guest_mode(vcpu) && !is_shadow_field_rw(field))
|
|
copy_vmcs02_to_vmcs12_rare(vcpu, vmcs12);
|
|
|
|
/*
|
|
* Some Intel CPUs intentionally drop the reserved bits of the AR byte
|
|
* fields on VMWRITE. Emulate this behavior to ensure consistent KVM
|
|
* behavior regardless of the underlying hardware, e.g. if an AR_BYTE
|
|
* field is intercepted for VMWRITE but not VMREAD (in L1), then VMREAD
|
|
* from L1 will return a different value than VMREAD from L2 (L1 sees
|
|
* the stripped down value, L2 sees the full value as stored by KVM).
|
|
*/
|
|
if (field >= GUEST_ES_AR_BYTES && field <= GUEST_TR_AR_BYTES)
|
|
value &= 0x1f0ff;
|
|
|
|
vmcs12_write_any(vmcs12, field, offset, value);
|
|
|
|
/*
|
|
* Do not track vmcs12 dirty-state if in guest-mode as we actually
|
|
* dirty shadow vmcs12 instead of vmcs12. Fields that can be updated
|
|
* by L1 without a vmexit are always updated in the vmcs02, i.e. don't
|
|
* "dirty" vmcs12, all others go down the prepare_vmcs02() slow path.
|
|
*/
|
|
if (!is_guest_mode(vcpu) && !is_shadow_field_rw(field)) {
|
|
/*
|
|
* L1 can read these fields without exiting, ensure the
|
|
* shadow VMCS is up-to-date.
|
|
*/
|
|
if (enable_shadow_vmcs && is_shadow_field_ro(field)) {
|
|
preempt_disable();
|
|
vmcs_load(vmx->vmcs01.shadow_vmcs);
|
|
|
|
__vmcs_writel(field, value);
|
|
|
|
vmcs_clear(vmx->vmcs01.shadow_vmcs);
|
|
vmcs_load(vmx->loaded_vmcs->vmcs);
|
|
preempt_enable();
|
|
}
|
|
vmx->nested.dirty_vmcs12 = true;
|
|
}
|
|
|
|
return nested_vmx_succeed(vcpu);
|
|
}
|
|
|
|
static void set_current_vmptr(struct vcpu_vmx *vmx, gpa_t vmptr)
|
|
{
|
|
vmx->nested.current_vmptr = vmptr;
|
|
if (enable_shadow_vmcs) {
|
|
secondary_exec_controls_setbit(vmx, SECONDARY_EXEC_SHADOW_VMCS);
|
|
vmcs_write64(VMCS_LINK_POINTER,
|
|
__pa(vmx->vmcs01.shadow_vmcs));
|
|
vmx->nested.need_vmcs12_to_shadow_sync = true;
|
|
}
|
|
vmx->nested.dirty_vmcs12 = true;
|
|
}
|
|
|
|
/* Emulate the VMPTRLD instruction */
|
|
static int handle_vmptrld(struct kvm_vcpu *vcpu)
|
|
{
|
|
struct vcpu_vmx *vmx = to_vmx(vcpu);
|
|
gpa_t vmptr;
|
|
int r;
|
|
|
|
if (!nested_vmx_check_permission(vcpu))
|
|
return 1;
|
|
|
|
if (nested_vmx_get_vmptr(vcpu, &vmptr, &r))
|
|
return r;
|
|
|
|
if (!page_address_valid(vcpu, vmptr))
|
|
return nested_vmx_fail(vcpu, VMXERR_VMPTRLD_INVALID_ADDRESS);
|
|
|
|
if (vmptr == vmx->nested.vmxon_ptr)
|
|
return nested_vmx_fail(vcpu, VMXERR_VMPTRLD_VMXON_POINTER);
|
|
|
|
/* Forbid normal VMPTRLD if Enlightened version was used */
|
|
if (evmptr_is_valid(vmx->nested.hv_evmcs_vmptr))
|
|
return 1;
|
|
|
|
if (vmx->nested.current_vmptr != vmptr) {
|
|
struct kvm_host_map map;
|
|
struct vmcs12 *new_vmcs12;
|
|
|
|
if (kvm_vcpu_map(vcpu, gpa_to_gfn(vmptr), &map)) {
|
|
/*
|
|
* Reads from an unbacked page return all 1s,
|
|
* which means that the 32 bits located at the
|
|
* given physical address won't match the required
|
|
* VMCS12_REVISION identifier.
|
|
*/
|
|
return nested_vmx_fail(vcpu,
|
|
VMXERR_VMPTRLD_INCORRECT_VMCS_REVISION_ID);
|
|
}
|
|
|
|
new_vmcs12 = map.hva;
|
|
|
|
if (new_vmcs12->hdr.revision_id != VMCS12_REVISION ||
|
|
(new_vmcs12->hdr.shadow_vmcs &&
|
|
!nested_cpu_has_vmx_shadow_vmcs(vcpu))) {
|
|
kvm_vcpu_unmap(vcpu, &map, false);
|
|
return nested_vmx_fail(vcpu,
|
|
VMXERR_VMPTRLD_INCORRECT_VMCS_REVISION_ID);
|
|
}
|
|
|
|
nested_release_vmcs12(vcpu);
|
|
|
|
/*
|
|
* Load VMCS12 from guest memory since it is not already
|
|
* cached.
|
|
*/
|
|
memcpy(vmx->nested.cached_vmcs12, new_vmcs12, VMCS12_SIZE);
|
|
kvm_vcpu_unmap(vcpu, &map, false);
|
|
|
|
set_current_vmptr(vmx, vmptr);
|
|
}
|
|
|
|
return nested_vmx_succeed(vcpu);
|
|
}
|
|
|
|
/* Emulate the VMPTRST instruction */
|
|
static int handle_vmptrst(struct kvm_vcpu *vcpu)
|
|
{
|
|
unsigned long exit_qual = vmx_get_exit_qual(vcpu);
|
|
u32 instr_info = vmcs_read32(VMX_INSTRUCTION_INFO);
|
|
gpa_t current_vmptr = to_vmx(vcpu)->nested.current_vmptr;
|
|
struct x86_exception e;
|
|
gva_t gva;
|
|
int r;
|
|
|
|
if (!nested_vmx_check_permission(vcpu))
|
|
return 1;
|
|
|
|
if (unlikely(evmptr_is_valid(to_vmx(vcpu)->nested.hv_evmcs_vmptr)))
|
|
return 1;
|
|
|
|
if (get_vmx_mem_address(vcpu, exit_qual, instr_info,
|
|
true, sizeof(gpa_t), &gva))
|
|
return 1;
|
|
/* *_system ok, nested_vmx_check_permission has verified cpl=0 */
|
|
r = kvm_write_guest_virt_system(vcpu, gva, (void *)¤t_vmptr,
|
|
sizeof(gpa_t), &e);
|
|
if (r != X86EMUL_CONTINUE)
|
|
return kvm_handle_memory_failure(vcpu, r, &e);
|
|
|
|
return nested_vmx_succeed(vcpu);
|
|
}
|
|
|
|
/* Emulate the INVEPT instruction */
|
|
static int handle_invept(struct kvm_vcpu *vcpu)
|
|
{
|
|
struct vcpu_vmx *vmx = to_vmx(vcpu);
|
|
u32 vmx_instruction_info, types;
|
|
unsigned long type, roots_to_free;
|
|
struct kvm_mmu *mmu;
|
|
gva_t gva;
|
|
struct x86_exception e;
|
|
struct {
|
|
u64 eptp, gpa;
|
|
} operand;
|
|
int i, r;
|
|
|
|
if (!(vmx->nested.msrs.secondary_ctls_high &
|
|
SECONDARY_EXEC_ENABLE_EPT) ||
|
|
!(vmx->nested.msrs.ept_caps & VMX_EPT_INVEPT_BIT)) {
|
|
kvm_queue_exception(vcpu, UD_VECTOR);
|
|
return 1;
|
|
}
|
|
|
|
if (!nested_vmx_check_permission(vcpu))
|
|
return 1;
|
|
|
|
vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
|
|
type = kvm_register_read(vcpu, (vmx_instruction_info >> 28) & 0xf);
|
|
|
|
types = (vmx->nested.msrs.ept_caps >> VMX_EPT_EXTENT_SHIFT) & 6;
|
|
|
|
if (type >= 32 || !(types & (1 << type)))
|
|
return nested_vmx_fail(vcpu, VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
|
|
|
|
/* According to the Intel VMX instruction reference, the memory
|
|
* operand is read even if it isn't needed (e.g., for type==global)
|
|
*/
|
|
if (get_vmx_mem_address(vcpu, vmx_get_exit_qual(vcpu),
|
|
vmx_instruction_info, false, sizeof(operand), &gva))
|
|
return 1;
|
|
r = kvm_read_guest_virt(vcpu, gva, &operand, sizeof(operand), &e);
|
|
if (r != X86EMUL_CONTINUE)
|
|
return kvm_handle_memory_failure(vcpu, r, &e);
|
|
|
|
/*
|
|
* Nested EPT roots are always held through guest_mmu,
|
|
* not root_mmu.
|
|
*/
|
|
mmu = &vcpu->arch.guest_mmu;
|
|
|
|
switch (type) {
|
|
case VMX_EPT_EXTENT_CONTEXT:
|
|
if (!nested_vmx_check_eptp(vcpu, operand.eptp))
|
|
return nested_vmx_fail(vcpu,
|
|
VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
|
|
|
|
roots_to_free = 0;
|
|
if (nested_ept_root_matches(mmu->root_hpa, mmu->root_pgd,
|
|
operand.eptp))
|
|
roots_to_free |= KVM_MMU_ROOT_CURRENT;
|
|
|
|
for (i = 0; i < KVM_MMU_NUM_PREV_ROOTS; i++) {
|
|
if (nested_ept_root_matches(mmu->prev_roots[i].hpa,
|
|
mmu->prev_roots[i].pgd,
|
|
operand.eptp))
|
|
roots_to_free |= KVM_MMU_ROOT_PREVIOUS(i);
|
|
}
|
|
break;
|
|
case VMX_EPT_EXTENT_GLOBAL:
|
|
roots_to_free = KVM_MMU_ROOTS_ALL;
|
|
break;
|
|
default:
|
|
BUG();
|
|
break;
|
|
}
|
|
|
|
if (roots_to_free)
|
|
kvm_mmu_free_roots(vcpu, mmu, roots_to_free);
|
|
|
|
return nested_vmx_succeed(vcpu);
|
|
}
|
|
|
|
static int handle_invvpid(struct kvm_vcpu *vcpu)
|
|
{
|
|
struct vcpu_vmx *vmx = to_vmx(vcpu);
|
|
u32 vmx_instruction_info;
|
|
unsigned long type, types;
|
|
gva_t gva;
|
|
struct x86_exception e;
|
|
struct {
|
|
u64 vpid;
|
|
u64 gla;
|
|
} operand;
|
|
u16 vpid02;
|
|
int r;
|
|
|
|
if (!(vmx->nested.msrs.secondary_ctls_high &
|
|
SECONDARY_EXEC_ENABLE_VPID) ||
|
|
!(vmx->nested.msrs.vpid_caps & VMX_VPID_INVVPID_BIT)) {
|
|
kvm_queue_exception(vcpu, UD_VECTOR);
|
|
return 1;
|
|
}
|
|
|
|
if (!nested_vmx_check_permission(vcpu))
|
|
return 1;
|
|
|
|
vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
|
|
type = kvm_register_read(vcpu, (vmx_instruction_info >> 28) & 0xf);
|
|
|
|
types = (vmx->nested.msrs.vpid_caps &
|
|
VMX_VPID_EXTENT_SUPPORTED_MASK) >> 8;
|
|
|
|
if (type >= 32 || !(types & (1 << type)))
|
|
return nested_vmx_fail(vcpu,
|
|
VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
|
|
|
|
/* according to the intel vmx instruction reference, the memory
|
|
* operand is read even if it isn't needed (e.g., for type==global)
|
|
*/
|
|
if (get_vmx_mem_address(vcpu, vmx_get_exit_qual(vcpu),
|
|
vmx_instruction_info, false, sizeof(operand), &gva))
|
|
return 1;
|
|
r = kvm_read_guest_virt(vcpu, gva, &operand, sizeof(operand), &e);
|
|
if (r != X86EMUL_CONTINUE)
|
|
return kvm_handle_memory_failure(vcpu, r, &e);
|
|
|
|
if (operand.vpid >> 16)
|
|
return nested_vmx_fail(vcpu,
|
|
VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
|
|
|
|
vpid02 = nested_get_vpid02(vcpu);
|
|
switch (type) {
|
|
case VMX_VPID_EXTENT_INDIVIDUAL_ADDR:
|
|
if (!operand.vpid ||
|
|
is_noncanonical_address(operand.gla, vcpu))
|
|
return nested_vmx_fail(vcpu,
|
|
VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
|
|
vpid_sync_vcpu_addr(vpid02, operand.gla);
|
|
break;
|
|
case VMX_VPID_EXTENT_SINGLE_CONTEXT:
|
|
case VMX_VPID_EXTENT_SINGLE_NON_GLOBAL:
|
|
if (!operand.vpid)
|
|
return nested_vmx_fail(vcpu,
|
|
VMXERR_INVALID_OPERAND_TO_INVEPT_INVVPID);
|
|
vpid_sync_context(vpid02);
|
|
break;
|
|
case VMX_VPID_EXTENT_ALL_CONTEXT:
|
|
vpid_sync_context(vpid02);
|
|
break;
|
|
default:
|
|
WARN_ON_ONCE(1);
|
|
return kvm_skip_emulated_instruction(vcpu);
|
|
}
|
|
|
|
/*
|
|
* Sync the shadow page tables if EPT is disabled, L1 is invalidating
|
|
* linear mappings for L2 (tagged with L2's VPID). Free all guest
|
|
* roots as VPIDs are not tracked in the MMU role.
|
|
*
|
|
* Note, this operates on root_mmu, not guest_mmu, as L1 and L2 share
|
|
* an MMU when EPT is disabled.
|
|
*
|
|
* TODO: sync only the affected SPTEs for INVDIVIDUAL_ADDR.
|
|
*/
|
|
if (!enable_ept)
|
|
kvm_mmu_free_guest_mode_roots(vcpu, &vcpu->arch.root_mmu);
|
|
|
|
return nested_vmx_succeed(vcpu);
|
|
}
|
|
|
|
static int nested_vmx_eptp_switching(struct kvm_vcpu *vcpu,
|
|
struct vmcs12 *vmcs12)
|
|
{
|
|
u32 index = kvm_rcx_read(vcpu);
|
|
u64 new_eptp;
|
|
|
|
if (WARN_ON_ONCE(!nested_cpu_has_ept(vmcs12)))
|
|
return 1;
|
|
if (index >= VMFUNC_EPTP_ENTRIES)
|
|
return 1;
|
|
|
|
if (kvm_vcpu_read_guest_page(vcpu, vmcs12->eptp_list_address >> PAGE_SHIFT,
|
|
&new_eptp, index * 8, 8))
|
|
return 1;
|
|
|
|
/*
|
|
* If the (L2) guest does a vmfunc to the currently
|
|
* active ept pointer, we don't have to do anything else
|
|
*/
|
|
if (vmcs12->ept_pointer != new_eptp) {
|
|
if (!nested_vmx_check_eptp(vcpu, new_eptp))
|
|
return 1;
|
|
|
|
vmcs12->ept_pointer = new_eptp;
|
|
nested_ept_new_eptp(vcpu);
|
|
|
|
if (!nested_cpu_has_vpid(vmcs12))
|
|
kvm_make_request(KVM_REQ_TLB_FLUSH_GUEST, vcpu);
|
|
}
|
|
|
|
return 0;
|
|
}
|
|
|
|
static int handle_vmfunc(struct kvm_vcpu *vcpu)
|
|
{
|
|
struct vcpu_vmx *vmx = to_vmx(vcpu);
|
|
struct vmcs12 *vmcs12;
|
|
u32 function = kvm_rax_read(vcpu);
|
|
|
|
/*
|
|
* VMFUNC is only supported for nested guests, but we always enable the
|
|
* secondary control for simplicity; for non-nested mode, fake that we
|
|
* didn't by injecting #UD.
|
|
*/
|
|
if (!is_guest_mode(vcpu)) {
|
|
kvm_queue_exception(vcpu, UD_VECTOR);
|
|
return 1;
|
|
}
|
|
|
|
vmcs12 = get_vmcs12(vcpu);
|
|
|
|
/*
|
|
* #UD on out-of-bounds function has priority over VM-Exit, and VMFUNC
|
|
* is enabled in vmcs02 if and only if it's enabled in vmcs12.
|
|
*/
|
|
if (WARN_ON_ONCE((function > 63) || !nested_cpu_has_vmfunc(vmcs12))) {
|
|
kvm_queue_exception(vcpu, UD_VECTOR);
|
|
return 1;
|
|
}
|
|
|
|
if (!(vmcs12->vm_function_control & BIT_ULL(function)))
|
|
goto fail;
|
|
|
|
switch (function) {
|
|
case 0:
|
|
if (nested_vmx_eptp_switching(vcpu, vmcs12))
|
|
goto fail;
|
|
break;
|
|
default:
|
|
goto fail;
|
|
}
|
|
return kvm_skip_emulated_instruction(vcpu);
|
|
|
|
fail:
|
|
/*
|
|
* This is effectively a reflected VM-Exit, as opposed to a synthesized
|
|
* nested VM-Exit. Pass the original exit reason, i.e. don't hardcode
|
|
* EXIT_REASON_VMFUNC as the exit reason.
|
|
*/
|
|
nested_vmx_vmexit(vcpu, vmx->exit_reason.full,
|
|
vmx_get_intr_info(vcpu),
|
|
vmx_get_exit_qual(vcpu));
|
|
return 1;
|
|
}
|
|
|
|
/*
|
|
* Return true if an IO instruction with the specified port and size should cause
|
|
* a VM-exit into L1.
|
|
*/
|
|
bool nested_vmx_check_io_bitmaps(struct kvm_vcpu *vcpu, unsigned int port,
|
|
int size)
|
|
{
|
|
struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
|
|
gpa_t bitmap, last_bitmap;
|
|
u8 b;
|
|
|
|
last_bitmap = (gpa_t)-1;
|
|
b = -1;
|
|
|
|
while (size > 0) {
|
|
if (port < 0x8000)
|
|
bitmap = vmcs12->io_bitmap_a;
|
|
else if (port < 0x10000)
|
|
bitmap = vmcs12->io_bitmap_b;
|
|
else
|
|
return true;
|
|
bitmap += (port & 0x7fff) / 8;
|
|
|
|
if (last_bitmap != bitmap)
|
|
if (kvm_vcpu_read_guest(vcpu, bitmap, &b, 1))
|
|
return true;
|
|
if (b & (1 << (port & 7)))
|
|
return true;
|
|
|
|
port++;
|
|
size--;
|
|
last_bitmap = bitmap;
|
|
}
|
|
|
|
return false;
|
|
}
|
|
|
|
static bool nested_vmx_exit_handled_io(struct kvm_vcpu *vcpu,
|
|
struct vmcs12 *vmcs12)
|
|
{
|
|
unsigned long exit_qualification;
|
|
unsigned short port;
|
|
int size;
|
|
|
|
if (!nested_cpu_has(vmcs12, CPU_BASED_USE_IO_BITMAPS))
|
|
return nested_cpu_has(vmcs12, CPU_BASED_UNCOND_IO_EXITING);
|
|
|
|
exit_qualification = vmx_get_exit_qual(vcpu);
|
|
|
|
port = exit_qualification >> 16;
|
|
size = (exit_qualification & 7) + 1;
|
|
|
|
return nested_vmx_check_io_bitmaps(vcpu, port, size);
|
|
}
|
|
|
|
/*
|
|
* Return 1 if we should exit from L2 to L1 to handle an MSR access,
|
|
* rather than handle it ourselves in L0. I.e., check whether L1 expressed
|
|
* disinterest in the current event (read or write a specific MSR) by using an
|
|
* MSR bitmap. This may be the case even when L0 doesn't use MSR bitmaps.
|
|
*/
|
|
static bool nested_vmx_exit_handled_msr(struct kvm_vcpu *vcpu,
|
|
struct vmcs12 *vmcs12,
|
|
union vmx_exit_reason exit_reason)
|
|
{
|
|
u32 msr_index = kvm_rcx_read(vcpu);
|
|
gpa_t bitmap;
|
|
|
|
if (!nested_cpu_has(vmcs12, CPU_BASED_USE_MSR_BITMAPS))
|
|
return true;
|
|
|
|
/*
|
|
* The MSR_BITMAP page is divided into four 1024-byte bitmaps,
|
|
* for the four combinations of read/write and low/high MSR numbers.
|
|
* First we need to figure out which of the four to use:
|
|
*/
|
|
bitmap = vmcs12->msr_bitmap;
|
|
if (exit_reason.basic == EXIT_REASON_MSR_WRITE)
|
|
bitmap += 2048;
|
|
if (msr_index >= 0xc0000000) {
|
|
msr_index -= 0xc0000000;
|
|
bitmap += 1024;
|
|
}
|
|
|
|
/* Then read the msr_index'th bit from this bitmap: */
|
|
if (msr_index < 1024*8) {
|
|
unsigned char b;
|
|
if (kvm_vcpu_read_guest(vcpu, bitmap + msr_index/8, &b, 1))
|
|
return true;
|
|
return 1 & (b >> (msr_index & 7));
|
|
} else
|
|
return true; /* let L1 handle the wrong parameter */
|
|
}
|
|
|
|
/*
|
|
* Return 1 if we should exit from L2 to L1 to handle a CR access exit,
|
|
* rather than handle it ourselves in L0. I.e., check if L1 wanted to
|
|
* intercept (via guest_host_mask etc.) the current event.
|
|
*/
|
|
static bool nested_vmx_exit_handled_cr(struct kvm_vcpu *vcpu,
|
|
struct vmcs12 *vmcs12)
|
|
{
|
|
unsigned long exit_qualification = vmx_get_exit_qual(vcpu);
|
|
int cr = exit_qualification & 15;
|
|
int reg;
|
|
unsigned long val;
|
|
|
|
switch ((exit_qualification >> 4) & 3) {
|
|
case 0: /* mov to cr */
|
|
reg = (exit_qualification >> 8) & 15;
|
|
val = kvm_register_read(vcpu, reg);
|
|
switch (cr) {
|
|
case 0:
|
|
if (vmcs12->cr0_guest_host_mask &
|
|
(val ^ vmcs12->cr0_read_shadow))
|
|
return true;
|
|
break;
|
|
case 3:
|
|
if (nested_cpu_has(vmcs12, CPU_BASED_CR3_LOAD_EXITING))
|
|
return true;
|
|
break;
|
|
case 4:
|
|
if (vmcs12->cr4_guest_host_mask &
|
|
(vmcs12->cr4_read_shadow ^ val))
|
|
return true;
|
|
break;
|
|
case 8:
|
|
if (nested_cpu_has(vmcs12, CPU_BASED_CR8_LOAD_EXITING))
|
|
return true;
|
|
break;
|
|
}
|
|
break;
|
|
case 2: /* clts */
|
|
if ((vmcs12->cr0_guest_host_mask & X86_CR0_TS) &&
|
|
(vmcs12->cr0_read_shadow & X86_CR0_TS))
|
|
return true;
|
|
break;
|
|
case 1: /* mov from cr */
|
|
switch (cr) {
|
|
case 3:
|
|
if (vmcs12->cpu_based_vm_exec_control &
|
|
CPU_BASED_CR3_STORE_EXITING)
|
|
return true;
|
|
break;
|
|
case 8:
|
|
if (vmcs12->cpu_based_vm_exec_control &
|
|
CPU_BASED_CR8_STORE_EXITING)
|
|
return true;
|
|
break;
|
|
}
|
|
break;
|
|
case 3: /* lmsw */
|
|
/*
|
|
* lmsw can change bits 1..3 of cr0, and only set bit 0 of
|
|
* cr0. Other attempted changes are ignored, with no exit.
|
|
*/
|
|
val = (exit_qualification >> LMSW_SOURCE_DATA_SHIFT) & 0x0f;
|
|
if (vmcs12->cr0_guest_host_mask & 0xe &
|
|
(val ^ vmcs12->cr0_read_shadow))
|
|
return true;
|
|
if ((vmcs12->cr0_guest_host_mask & 0x1) &&
|
|
!(vmcs12->cr0_read_shadow & 0x1) &&
|
|
(val & 0x1))
|
|
return true;
|
|
break;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
static bool nested_vmx_exit_handled_encls(struct kvm_vcpu *vcpu,
|
|
struct vmcs12 *vmcs12)
|
|
{
|
|
u32 encls_leaf;
|
|
|
|
if (!guest_cpuid_has(vcpu, X86_FEATURE_SGX) ||
|
|
!nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENCLS_EXITING))
|
|
return false;
|
|
|
|
encls_leaf = kvm_rax_read(vcpu);
|
|
if (encls_leaf > 62)
|
|
encls_leaf = 63;
|
|
return vmcs12->encls_exiting_bitmap & BIT_ULL(encls_leaf);
|
|
}
|
|
|
|
static bool nested_vmx_exit_handled_vmcs_access(struct kvm_vcpu *vcpu,
|
|
struct vmcs12 *vmcs12, gpa_t bitmap)
|
|
{
|
|
u32 vmx_instruction_info;
|
|
unsigned long field;
|
|
u8 b;
|
|
|
|
if (!nested_cpu_has_shadow_vmcs(vmcs12))
|
|
return true;
|
|
|
|
/* Decode instruction info and find the field to access */
|
|
vmx_instruction_info = vmcs_read32(VMX_INSTRUCTION_INFO);
|
|
field = kvm_register_read(vcpu, (((vmx_instruction_info) >> 28) & 0xf));
|
|
|
|
/* Out-of-range fields always cause a VM exit from L2 to L1 */
|
|
if (field >> 15)
|
|
return true;
|
|
|
|
if (kvm_vcpu_read_guest(vcpu, bitmap + field/8, &b, 1))
|
|
return true;
|
|
|
|
return 1 & (b >> (field & 7));
|
|
}
|
|
|
|
static bool nested_vmx_exit_handled_mtf(struct vmcs12 *vmcs12)
|
|
{
|
|
u32 entry_intr_info = vmcs12->vm_entry_intr_info_field;
|
|
|
|
if (nested_cpu_has_mtf(vmcs12))
|
|
return true;
|
|
|
|
/*
|
|
* An MTF VM-exit may be injected into the guest by setting the
|
|
* interruption-type to 7 (other event) and the vector field to 0. Such
|
|
* is the case regardless of the 'monitor trap flag' VM-execution
|
|
* control.
|
|
*/
|
|
return entry_intr_info == (INTR_INFO_VALID_MASK
|
|
| INTR_TYPE_OTHER_EVENT);
|
|
}
|
|
|
|
/*
|
|
* Return true if L0 wants to handle an exit from L2 regardless of whether or not
|
|
* L1 wants the exit. Only call this when in is_guest_mode (L2).
|
|
*/
|
|
static bool nested_vmx_l0_wants_exit(struct kvm_vcpu *vcpu,
|
|
union vmx_exit_reason exit_reason)
|
|
{
|
|
u32 intr_info;
|
|
|
|
switch ((u16)exit_reason.basic) {
|
|
case EXIT_REASON_EXCEPTION_NMI:
|
|
intr_info = vmx_get_intr_info(vcpu);
|
|
if (is_nmi(intr_info))
|
|
return true;
|
|
else if (is_page_fault(intr_info))
|
|
return vcpu->arch.apf.host_apf_flags ||
|
|
vmx_need_pf_intercept(vcpu);
|
|
else if (is_debug(intr_info) &&
|
|
vcpu->guest_debug &
|
|
(KVM_GUESTDBG_SINGLESTEP | KVM_GUESTDBG_USE_HW_BP))
|
|
return true;
|
|
else if (is_breakpoint(intr_info) &&
|
|
vcpu->guest_debug & KVM_GUESTDBG_USE_SW_BP)
|
|
return true;
|
|
else if (is_alignment_check(intr_info) &&
|
|
!vmx_guest_inject_ac(vcpu))
|
|
return true;
|
|
return false;
|
|
case EXIT_REASON_EXTERNAL_INTERRUPT:
|
|
return true;
|
|
case EXIT_REASON_MCE_DURING_VMENTRY:
|
|
return true;
|
|
case EXIT_REASON_EPT_VIOLATION:
|
|
/*
|
|
* L0 always deals with the EPT violation. If nested EPT is
|
|
* used, and the nested mmu code discovers that the address is
|
|
* missing in the guest EPT table (EPT12), the EPT violation
|
|
* will be injected with nested_ept_inject_page_fault()
|
|
*/
|
|
return true;
|
|
case EXIT_REASON_EPT_MISCONFIG:
|
|
/*
|
|
* L2 never uses directly L1's EPT, but rather L0's own EPT
|
|
* table (shadow on EPT) or a merged EPT table that L0 built
|
|
* (EPT on EPT). So any problems with the structure of the
|
|
* table is L0's fault.
|
|
*/
|
|
return true;
|
|
case EXIT_REASON_PREEMPTION_TIMER:
|
|
return true;
|
|
case EXIT_REASON_PML_FULL:
|
|
/*
|
|
* PML is emulated for an L1 VMM and should never be enabled in
|
|
* vmcs02, always "handle" PML_FULL by exiting to userspace.
|
|
*/
|
|
return true;
|
|
case EXIT_REASON_VMFUNC:
|
|
/* VM functions are emulated through L2->L0 vmexits. */
|
|
return true;
|
|
case EXIT_REASON_BUS_LOCK:
|
|
/*
|
|
* At present, bus lock VM exit is never exposed to L1.
|
|
* Handle L2's bus locks in L0 directly.
|
|
*/
|
|
return true;
|
|
default:
|
|
break;
|
|
}
|
|
return false;
|
|
}
|
|
|
|
/*
|
|
* Return 1 if L1 wants to intercept an exit from L2. Only call this when in
|
|
* is_guest_mode (L2).
|
|
*/
|
|
static bool nested_vmx_l1_wants_exit(struct kvm_vcpu *vcpu,
|
|
union vmx_exit_reason exit_reason)
|
|
{
|
|
struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
|
|
u32 intr_info;
|
|
|
|
switch ((u16)exit_reason.basic) {
|
|
case EXIT_REASON_EXCEPTION_NMI:
|
|
intr_info = vmx_get_intr_info(vcpu);
|
|
if (is_nmi(intr_info))
|
|
return true;
|
|
else if (is_page_fault(intr_info))
|
|
return true;
|
|
return vmcs12->exception_bitmap &
|
|
(1u << (intr_info & INTR_INFO_VECTOR_MASK));
|
|
case EXIT_REASON_EXTERNAL_INTERRUPT:
|
|
return nested_exit_on_intr(vcpu);
|
|
case EXIT_REASON_TRIPLE_FAULT:
|
|
return true;
|
|
case EXIT_REASON_INTERRUPT_WINDOW:
|
|
return nested_cpu_has(vmcs12, CPU_BASED_INTR_WINDOW_EXITING);
|
|
case EXIT_REASON_NMI_WINDOW:
|
|
return nested_cpu_has(vmcs12, CPU_BASED_NMI_WINDOW_EXITING);
|
|
case EXIT_REASON_TASK_SWITCH:
|
|
return true;
|
|
case EXIT_REASON_CPUID:
|
|
return true;
|
|
case EXIT_REASON_HLT:
|
|
return nested_cpu_has(vmcs12, CPU_BASED_HLT_EXITING);
|
|
case EXIT_REASON_INVD:
|
|
return true;
|
|
case EXIT_REASON_INVLPG:
|
|
return nested_cpu_has(vmcs12, CPU_BASED_INVLPG_EXITING);
|
|
case EXIT_REASON_RDPMC:
|
|
return nested_cpu_has(vmcs12, CPU_BASED_RDPMC_EXITING);
|
|
case EXIT_REASON_RDRAND:
|
|
return nested_cpu_has2(vmcs12, SECONDARY_EXEC_RDRAND_EXITING);
|
|
case EXIT_REASON_RDSEED:
|
|
return nested_cpu_has2(vmcs12, SECONDARY_EXEC_RDSEED_EXITING);
|
|
case EXIT_REASON_RDTSC: case EXIT_REASON_RDTSCP:
|
|
return nested_cpu_has(vmcs12, CPU_BASED_RDTSC_EXITING);
|
|
case EXIT_REASON_VMREAD:
|
|
return nested_vmx_exit_handled_vmcs_access(vcpu, vmcs12,
|
|
vmcs12->vmread_bitmap);
|
|
case EXIT_REASON_VMWRITE:
|
|
return nested_vmx_exit_handled_vmcs_access(vcpu, vmcs12,
|
|
vmcs12->vmwrite_bitmap);
|
|
case EXIT_REASON_VMCALL: case EXIT_REASON_VMCLEAR:
|
|
case EXIT_REASON_VMLAUNCH: case EXIT_REASON_VMPTRLD:
|
|
case EXIT_REASON_VMPTRST: case EXIT_REASON_VMRESUME:
|
|
case EXIT_REASON_VMOFF: case EXIT_REASON_VMON:
|
|
case EXIT_REASON_INVEPT: case EXIT_REASON_INVVPID:
|
|
/*
|
|
* VMX instructions trap unconditionally. This allows L1 to
|
|
* emulate them for its L2 guest, i.e., allows 3-level nesting!
|
|
*/
|
|
return true;
|
|
case EXIT_REASON_CR_ACCESS:
|
|
return nested_vmx_exit_handled_cr(vcpu, vmcs12);
|
|
case EXIT_REASON_DR_ACCESS:
|
|
return nested_cpu_has(vmcs12, CPU_BASED_MOV_DR_EXITING);
|
|
case EXIT_REASON_IO_INSTRUCTION:
|
|
return nested_vmx_exit_handled_io(vcpu, vmcs12);
|
|
case EXIT_REASON_GDTR_IDTR: case EXIT_REASON_LDTR_TR:
|
|
return nested_cpu_has2(vmcs12, SECONDARY_EXEC_DESC);
|
|
case EXIT_REASON_MSR_READ:
|
|
case EXIT_REASON_MSR_WRITE:
|
|
return nested_vmx_exit_handled_msr(vcpu, vmcs12, exit_reason);
|
|
case EXIT_REASON_INVALID_STATE:
|
|
return true;
|
|
case EXIT_REASON_MWAIT_INSTRUCTION:
|
|
return nested_cpu_has(vmcs12, CPU_BASED_MWAIT_EXITING);
|
|
case EXIT_REASON_MONITOR_TRAP_FLAG:
|
|
return nested_vmx_exit_handled_mtf(vmcs12);
|
|
case EXIT_REASON_MONITOR_INSTRUCTION:
|
|
return nested_cpu_has(vmcs12, CPU_BASED_MONITOR_EXITING);
|
|
case EXIT_REASON_PAUSE_INSTRUCTION:
|
|
return nested_cpu_has(vmcs12, CPU_BASED_PAUSE_EXITING) ||
|
|
nested_cpu_has2(vmcs12,
|
|
SECONDARY_EXEC_PAUSE_LOOP_EXITING);
|
|
case EXIT_REASON_MCE_DURING_VMENTRY:
|
|
return true;
|
|
case EXIT_REASON_TPR_BELOW_THRESHOLD:
|
|
return nested_cpu_has(vmcs12, CPU_BASED_TPR_SHADOW);
|
|
case EXIT_REASON_APIC_ACCESS:
|
|
case EXIT_REASON_APIC_WRITE:
|
|
case EXIT_REASON_EOI_INDUCED:
|
|
/*
|
|
* The controls for "virtualize APIC accesses," "APIC-
|
|
* register virtualization," and "virtual-interrupt
|
|
* delivery" only come from vmcs12.
|
|
*/
|
|
return true;
|
|
case EXIT_REASON_INVPCID:
|
|
return
|
|
nested_cpu_has2(vmcs12, SECONDARY_EXEC_ENABLE_INVPCID) &&
|
|
nested_cpu_has(vmcs12, CPU_BASED_INVLPG_EXITING);
|
|
case EXIT_REASON_WBINVD:
|
|
return nested_cpu_has2(vmcs12, SECONDARY_EXEC_WBINVD_EXITING);
|
|
case EXIT_REASON_XSETBV:
|
|
return true;
|
|
case EXIT_REASON_XSAVES: case EXIT_REASON_XRSTORS:
|
|
/*
|
|
* This should never happen, since it is not possible to
|
|
* set XSS to a non-zero value---neither in L1 nor in L2.
|
|
* If if it were, XSS would have to be checked against
|
|
* the XSS exit bitmap in vmcs12.
|
|
*/
|
|
return nested_cpu_has2(vmcs12, SECONDARY_EXEC_XSAVES);
|
|
case EXIT_REASON_UMWAIT:
|
|
case EXIT_REASON_TPAUSE:
|
|
return nested_cpu_has2(vmcs12,
|
|
SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE);
|
|
case EXIT_REASON_ENCLS:
|
|
return nested_vmx_exit_handled_encls(vcpu, vmcs12);
|
|
default:
|
|
return true;
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Conditionally reflect a VM-Exit into L1. Returns %true if the VM-Exit was
|
|
* reflected into L1.
|
|
*/
|
|
bool nested_vmx_reflect_vmexit(struct kvm_vcpu *vcpu)
|
|
{
|
|
struct vcpu_vmx *vmx = to_vmx(vcpu);
|
|
union vmx_exit_reason exit_reason = vmx->exit_reason;
|
|
unsigned long exit_qual;
|
|
u32 exit_intr_info;
|
|
|
|
WARN_ON_ONCE(vmx->nested.nested_run_pending);
|
|
|
|
/*
|
|
* Late nested VM-Fail shares the same flow as nested VM-Exit since KVM
|
|
* has already loaded L2's state.
|
|
*/
|
|
if (unlikely(vmx->fail)) {
|
|
trace_kvm_nested_vmenter_failed(
|
|
"hardware VM-instruction error: ",
|
|
vmcs_read32(VM_INSTRUCTION_ERROR));
|
|
exit_intr_info = 0;
|
|
exit_qual = 0;
|
|
goto reflect_vmexit;
|
|
}
|
|
|
|
trace_kvm_nested_vmexit(exit_reason.full, vcpu, KVM_ISA_VMX);
|
|
|
|
/* If L0 (KVM) wants the exit, it trumps L1's desires. */
|
|
if (nested_vmx_l0_wants_exit(vcpu, exit_reason))
|
|
return false;
|
|
|
|
/* If L1 doesn't want the exit, handle it in L0. */
|
|
if (!nested_vmx_l1_wants_exit(vcpu, exit_reason))
|
|
return false;
|
|
|
|
/*
|
|
* vmcs.VM_EXIT_INTR_INFO is only valid for EXCEPTION_NMI exits. For
|
|
* EXTERNAL_INTERRUPT, the value for vmcs12->vm_exit_intr_info would
|
|
* need to be synthesized by querying the in-kernel LAPIC, but external
|
|
* interrupts are never reflected to L1 so it's a non-issue.
|
|
*/
|
|
exit_intr_info = vmx_get_intr_info(vcpu);
|
|
if (is_exception_with_error_code(exit_intr_info)) {
|
|
struct vmcs12 *vmcs12 = get_vmcs12(vcpu);
|
|
|
|
vmcs12->vm_exit_intr_error_code =
|
|
vmcs_read32(VM_EXIT_INTR_ERROR_CODE);
|
|
}
|
|
exit_qual = vmx_get_exit_qual(vcpu);
|
|
|
|
reflect_vmexit:
|
|
nested_vmx_vmexit(vcpu, exit_reason.full, exit_intr_info, exit_qual);
|
|
return true;
|
|
}
|
|
|
|
static int vmx_get_nested_state(struct kvm_vcpu *vcpu,
|
|
struct kvm_nested_state __user *user_kvm_nested_state,
|
|
u32 user_data_size)
|
|
{
|
|
struct vcpu_vmx *vmx;
|
|
struct vmcs12 *vmcs12;
|
|
struct kvm_nested_state kvm_state = {
|
|
.flags = 0,
|
|
.format = KVM_STATE_NESTED_FORMAT_VMX,
|
|
.size = sizeof(kvm_state),
|
|
.hdr.vmx.flags = 0,
|
|
.hdr.vmx.vmxon_pa = -1ull,
|
|
.hdr.vmx.vmcs12_pa = -1ull,
|
|
.hdr.vmx.preemption_timer_deadline = 0,
|
|
};
|
|
struct kvm_vmx_nested_state_data __user *user_vmx_nested_state =
|
|
&user_kvm_nested_state->data.vmx[0];
|
|
|
|
if (!vcpu)
|
|
return kvm_state.size + sizeof(*user_vmx_nested_state);
|
|
|
|
vmx = to_vmx(vcpu);
|
|
vmcs12 = get_vmcs12(vcpu);
|
|
|
|
if (nested_vmx_allowed(vcpu) &&
|
|
(vmx->nested.vmxon || vmx->nested.smm.vmxon)) {
|
|
kvm_state.hdr.vmx.vmxon_pa = vmx->nested.vmxon_ptr;
|
|
kvm_state.hdr.vmx.vmcs12_pa = vmx->nested.current_vmptr;
|
|
|
|
if (vmx_has_valid_vmcs12(vcpu)) {
|
|
kvm_state.size += sizeof(user_vmx_nested_state->vmcs12);
|
|
|
|
/* 'hv_evmcs_vmptr' can also be EVMPTR_MAP_PENDING here */
|
|
if (vmx->nested.hv_evmcs_vmptr != EVMPTR_INVALID)
|
|
kvm_state.flags |= KVM_STATE_NESTED_EVMCS;
|
|
|
|
if (is_guest_mode(vcpu) &&
|
|
nested_cpu_has_shadow_vmcs(vmcs12) &&
|
|
vmcs12->vmcs_link_pointer != -1ull)
|
|
kvm_state.size += sizeof(user_vmx_nested_state->shadow_vmcs12);
|
|
}
|
|
|
|
if (vmx->nested.smm.vmxon)
|
|
kvm_state.hdr.vmx.smm.flags |= KVM_STATE_NESTED_SMM_VMXON;
|
|
|
|
if (vmx->nested.smm.guest_mode)
|
|
kvm_state.hdr.vmx.smm.flags |= KVM_STATE_NESTED_SMM_GUEST_MODE;
|
|
|
|
if (is_guest_mode(vcpu)) {
|
|
kvm_state.flags |= KVM_STATE_NESTED_GUEST_MODE;
|
|
|
|
if (vmx->nested.nested_run_pending)
|
|
kvm_state.flags |= KVM_STATE_NESTED_RUN_PENDING;
|
|
|
|
if (vmx->nested.mtf_pending)
|
|
kvm_state.flags |= KVM_STATE_NESTED_MTF_PENDING;
|
|
|
|
if (nested_cpu_has_preemption_timer(vmcs12) &&
|
|
vmx->nested.has_preemption_timer_deadline) {
|
|
kvm_state.hdr.vmx.flags |=
|
|
KVM_STATE_VMX_PREEMPTION_TIMER_DEADLINE;
|
|
kvm_state.hdr.vmx.preemption_timer_deadline =
|
|
vmx->nested.preemption_timer_deadline;
|
|
}
|
|
}
|
|
}
|
|
|
|
if (user_data_size < kvm_state.size)
|
|
goto out;
|
|
|
|
if (copy_to_user(user_kvm_nested_state, &kvm_state, sizeof(kvm_state)))
|
|
return -EFAULT;
|
|
|
|
if (!vmx_has_valid_vmcs12(vcpu))
|
|
goto out;
|
|
|
|
/*
|
|
* When running L2, the authoritative vmcs12 state is in the
|
|
* vmcs02. When running L1, the authoritative vmcs12 state is
|
|
* in the shadow or enlightened vmcs linked to vmcs01, unless
|
|
* need_vmcs12_to_shadow_sync is set, in which case, the authoritative
|
|
* vmcs12 state is in the vmcs12 already.
|
|
*/
|
|
if (is_guest_mode(vcpu)) {
|
|
sync_vmcs02_to_vmcs12(vcpu, vmcs12);
|
|
sync_vmcs02_to_vmcs12_rare(vcpu, vmcs12);
|
|
} else {
|
|
copy_vmcs02_to_vmcs12_rare(vcpu, get_vmcs12(vcpu));
|
|
if (!vmx->nested.need_vmcs12_to_shadow_sync) {
|
|
if (evmptr_is_valid(vmx->nested.hv_evmcs_vmptr))
|
|
/*
|
|
* L1 hypervisor is not obliged to keep eVMCS
|
|
* clean fields data always up-to-date while
|
|
* not in guest mode, 'hv_clean_fields' is only
|
|
* supposed to be actual upon vmentry so we need
|
|
* to ignore it here and do full copy.
|
|
*/
|
|
copy_enlightened_to_vmcs12(vmx, 0);
|
|
else if (enable_shadow_vmcs)
|
|
copy_shadow_to_vmcs12(vmx);
|
|
}
|
|
}
|
|
|
|
BUILD_BUG_ON(sizeof(user_vmx_nested_state->vmcs12) < VMCS12_SIZE);
|
|
BUILD_BUG_ON(sizeof(user_vmx_nested_state->shadow_vmcs12) < VMCS12_SIZE);
|
|
|
|
/*
|
|
* Copy over the full allocated size of vmcs12 rather than just the size
|
|
* of the struct.
|
|
*/
|
|
if (copy_to_user(user_vmx_nested_state->vmcs12, vmcs12, VMCS12_SIZE))
|
|
return -EFAULT;
|
|
|
|
if (nested_cpu_has_shadow_vmcs(vmcs12) &&
|
|
vmcs12->vmcs_link_pointer != -1ull) {
|
|
if (copy_to_user(user_vmx_nested_state->shadow_vmcs12,
|
|
get_shadow_vmcs12(vcpu), VMCS12_SIZE))
|
|
return -EFAULT;
|
|
}
|
|
out:
|
|
return kvm_state.size;
|
|
}
|
|
|
|
void vmx_leave_nested(struct kvm_vcpu *vcpu)
|
|
{
|
|
if (is_guest_mode(vcpu)) {
|
|
to_vmx(vcpu)->nested.nested_run_pending = 0;
|
|
nested_vmx_vmexit(vcpu, -1, 0, 0);
|
|
}
|
|
free_nested(vcpu);
|
|
}
|
|
|
|
static int vmx_set_nested_state(struct kvm_vcpu *vcpu,
|
|
struct kvm_nested_state __user *user_kvm_nested_state,
|
|
struct kvm_nested_state *kvm_state)
|
|
{
|
|
struct vcpu_vmx *vmx = to_vmx(vcpu);
|
|
struct vmcs12 *vmcs12;
|
|
enum vm_entry_failure_code ignored;
|
|
struct kvm_vmx_nested_state_data __user *user_vmx_nested_state =
|
|
&user_kvm_nested_state->data.vmx[0];
|
|
int ret;
|
|
|
|
if (kvm_state->format != KVM_STATE_NESTED_FORMAT_VMX)
|
|
return -EINVAL;
|
|
|
|
if (kvm_state->hdr.vmx.vmxon_pa == -1ull) {
|
|
if (kvm_state->hdr.vmx.smm.flags)
|
|
return -EINVAL;
|
|
|
|
if (kvm_state->hdr.vmx.vmcs12_pa != -1ull)
|
|
return -EINVAL;
|
|
|
|
/*
|
|
* KVM_STATE_NESTED_EVMCS used to signal that KVM should
|
|
* enable eVMCS capability on vCPU. However, since then
|
|
* code was changed such that flag signals vmcs12 should
|
|
* be copied into eVMCS in guest memory.
|
|
*
|
|
* To preserve backwards compatability, allow user
|
|
* to set this flag even when there is no VMXON region.
|
|
*/
|
|
if (kvm_state->flags & ~KVM_STATE_NESTED_EVMCS)
|
|
return -EINVAL;
|
|
} else {
|
|
if (!nested_vmx_allowed(vcpu))
|
|
return -EINVAL;
|
|
|
|
if (!page_address_valid(vcpu, kvm_state->hdr.vmx.vmxon_pa))
|
|
return -EINVAL;
|
|
}
|
|
|
|
if ((kvm_state->hdr.vmx.smm.flags & KVM_STATE_NESTED_SMM_GUEST_MODE) &&
|
|
(kvm_state->flags & KVM_STATE_NESTED_GUEST_MODE))
|
|
return -EINVAL;
|
|
|
|
if (kvm_state->hdr.vmx.smm.flags &
|
|
~(KVM_STATE_NESTED_SMM_GUEST_MODE | KVM_STATE_NESTED_SMM_VMXON))
|
|
return -EINVAL;
|
|
|
|
if (kvm_state->hdr.vmx.flags & ~KVM_STATE_VMX_PREEMPTION_TIMER_DEADLINE)
|
|
return -EINVAL;
|
|
|
|
/*
|
|
* SMM temporarily disables VMX, so we cannot be in guest mode,
|
|
* nor can VMLAUNCH/VMRESUME be pending. Outside SMM, SMM flags
|
|
* must be zero.
|
|
*/
|
|
if (is_smm(vcpu) ?
|
|
(kvm_state->flags &
|
|
(KVM_STATE_NESTED_GUEST_MODE | KVM_STATE_NESTED_RUN_PENDING))
|
|
: kvm_state->hdr.vmx.smm.flags)
|
|
return -EINVAL;
|
|
|
|
if ((kvm_state->hdr.vmx.smm.flags & KVM_STATE_NESTED_SMM_GUEST_MODE) &&
|
|
!(kvm_state->hdr.vmx.smm.flags & KVM_STATE_NESTED_SMM_VMXON))
|
|
return -EINVAL;
|
|
|
|
if ((kvm_state->flags & KVM_STATE_NESTED_EVMCS) &&
|
|
(!nested_vmx_allowed(vcpu) || !vmx->nested.enlightened_vmcs_enabled))
|
|
return -EINVAL;
|
|
|
|
vmx_leave_nested(vcpu);
|
|
|
|
if (kvm_state->hdr.vmx.vmxon_pa == -1ull)
|
|
return 0;
|
|
|
|
vmx->nested.vmxon_ptr = kvm_state->hdr.vmx.vmxon_pa;
|
|
ret = enter_vmx_operation(vcpu);
|
|
if (ret)
|
|
return ret;
|
|
|
|
/* Empty 'VMXON' state is permitted if no VMCS loaded */
|
|
if (kvm_state->size < sizeof(*kvm_state) + sizeof(*vmcs12)) {
|
|
/* See vmx_has_valid_vmcs12. */
|
|
if ((kvm_state->flags & KVM_STATE_NESTED_GUEST_MODE) ||
|
|
(kvm_state->flags & KVM_STATE_NESTED_EVMCS) ||
|
|
(kvm_state->hdr.vmx.vmcs12_pa != -1ull))
|
|
return -EINVAL;
|
|
else
|
|
return 0;
|
|
}
|
|
|
|
if (kvm_state->hdr.vmx.vmcs12_pa != -1ull) {
|
|
if (kvm_state->hdr.vmx.vmcs12_pa == kvm_state->hdr.vmx.vmxon_pa ||
|
|
!page_address_valid(vcpu, kvm_state->hdr.vmx.vmcs12_pa))
|
|
return -EINVAL;
|
|
|
|
set_current_vmptr(vmx, kvm_state->hdr.vmx.vmcs12_pa);
|
|
} else if (kvm_state->flags & KVM_STATE_NESTED_EVMCS) {
|
|
/*
|
|
* nested_vmx_handle_enlightened_vmptrld() cannot be called
|
|
* directly from here as HV_X64_MSR_VP_ASSIST_PAGE may not be
|
|
* restored yet. EVMCS will be mapped from
|
|
* nested_get_vmcs12_pages().
|
|
*/
|
|
vmx->nested.hv_evmcs_vmptr = EVMPTR_MAP_PENDING;
|
|
kvm_make_request(KVM_REQ_GET_NESTED_STATE_PAGES, vcpu);
|
|
} else {
|
|
return -EINVAL;
|
|
}
|
|
|
|
if (kvm_state->hdr.vmx.smm.flags & KVM_STATE_NESTED_SMM_VMXON) {
|
|
vmx->nested.smm.vmxon = true;
|
|
vmx->nested.vmxon = false;
|
|
|
|
if (kvm_state->hdr.vmx.smm.flags & KVM_STATE_NESTED_SMM_GUEST_MODE)
|
|
vmx->nested.smm.guest_mode = true;
|
|
}
|
|
|
|
vmcs12 = get_vmcs12(vcpu);
|
|
if (copy_from_user(vmcs12, user_vmx_nested_state->vmcs12, sizeof(*vmcs12)))
|
|
return -EFAULT;
|
|
|
|
if (vmcs12->hdr.revision_id != VMCS12_REVISION)
|
|
return -EINVAL;
|
|
|
|
if (!(kvm_state->flags & KVM_STATE_NESTED_GUEST_MODE))
|
|
return 0;
|
|
|
|
vmx->nested.nested_run_pending =
|
|
!!(kvm_state->flags & KVM_STATE_NESTED_RUN_PENDING);
|
|
|
|
vmx->nested.mtf_pending =
|
|
!!(kvm_state->flags & KVM_STATE_NESTED_MTF_PENDING);
|
|
|
|
ret = -EINVAL;
|
|
if (nested_cpu_has_shadow_vmcs(vmcs12) &&
|
|
vmcs12->vmcs_link_pointer != -1ull) {
|
|
struct vmcs12 *shadow_vmcs12 = get_shadow_vmcs12(vcpu);
|
|
|
|
if (kvm_state->size <
|
|
sizeof(*kvm_state) +
|
|
sizeof(user_vmx_nested_state->vmcs12) + sizeof(*shadow_vmcs12))
|
|
goto error_guest_mode;
|
|
|
|
if (copy_from_user(shadow_vmcs12,
|
|
user_vmx_nested_state->shadow_vmcs12,
|
|
sizeof(*shadow_vmcs12))) {
|
|
ret = -EFAULT;
|
|
goto error_guest_mode;
|
|
}
|
|
|
|
if (shadow_vmcs12->hdr.revision_id != VMCS12_REVISION ||
|
|
!shadow_vmcs12->hdr.shadow_vmcs)
|
|
goto error_guest_mode;
|
|
}
|
|
|
|
vmx->nested.has_preemption_timer_deadline = false;
|
|
if (kvm_state->hdr.vmx.flags & KVM_STATE_VMX_PREEMPTION_TIMER_DEADLINE) {
|
|
vmx->nested.has_preemption_timer_deadline = true;
|
|
vmx->nested.preemption_timer_deadline =
|
|
kvm_state->hdr.vmx.preemption_timer_deadline;
|
|
}
|
|
|
|
if (nested_vmx_check_controls(vcpu, vmcs12) ||
|
|
nested_vmx_check_host_state(vcpu, vmcs12) ||
|
|
nested_vmx_check_guest_state(vcpu, vmcs12, &ignored))
|
|
goto error_guest_mode;
|
|
|
|
vmx->nested.dirty_vmcs12 = true;
|
|
ret = nested_vmx_enter_non_root_mode(vcpu, false);
|
|
if (ret)
|
|
goto error_guest_mode;
|
|
|
|
return 0;
|
|
|
|
error_guest_mode:
|
|
vmx->nested.nested_run_pending = 0;
|
|
return ret;
|
|
}
|
|
|
|
void nested_vmx_set_vmcs_shadowing_bitmap(void)
|
|
{
|
|
if (enable_shadow_vmcs) {
|
|
vmcs_write64(VMREAD_BITMAP, __pa(vmx_vmread_bitmap));
|
|
vmcs_write64(VMWRITE_BITMAP, __pa(vmx_vmwrite_bitmap));
|
|
}
|
|
}
|
|
|
|
/*
|
|
* Indexing into the vmcs12 uses the VMCS encoding rotated left by 6. Undo
|
|
* that madness to get the encoding for comparison.
|
|
*/
|
|
#define VMCS12_IDX_TO_ENC(idx) ((u16)(((u16)(idx) >> 6) | ((u16)(idx) << 10)))
|
|
|
|
static u64 nested_vmx_calc_vmcs_enum_msr(void)
|
|
{
|
|
/*
|
|
* Note these are the so called "index" of the VMCS field encoding, not
|
|
* the index into vmcs12.
|
|
*/
|
|
unsigned int max_idx, idx;
|
|
int i;
|
|
|
|
/*
|
|
* For better or worse, KVM allows VMREAD/VMWRITE to all fields in
|
|
* vmcs12, regardless of whether or not the associated feature is
|
|
* exposed to L1. Simply find the field with the highest index.
|
|
*/
|
|
max_idx = 0;
|
|
for (i = 0; i < nr_vmcs12_fields; i++) {
|
|
/* The vmcs12 table is very, very sparsely populated. */
|
|
if (!vmcs_field_to_offset_table[i])
|
|
continue;
|
|
|
|
idx = vmcs_field_index(VMCS12_IDX_TO_ENC(i));
|
|
if (idx > max_idx)
|
|
max_idx = idx;
|
|
}
|
|
|
|
return (u64)max_idx << VMCS_FIELD_INDEX_SHIFT;
|
|
}
|
|
|
|
/*
|
|
* nested_vmx_setup_ctls_msrs() sets up variables containing the values to be
|
|
* returned for the various VMX controls MSRs when nested VMX is enabled.
|
|
* The same values should also be used to verify that vmcs12 control fields are
|
|
* valid during nested entry from L1 to L2.
|
|
* Each of these control msrs has a low and high 32-bit half: A low bit is on
|
|
* if the corresponding bit in the (32-bit) control field *must* be on, and a
|
|
* bit in the high half is on if the corresponding bit in the control field
|
|
* may be on. See also vmx_control_verify().
|
|
*/
|
|
void nested_vmx_setup_ctls_msrs(struct nested_vmx_msrs *msrs, u32 ept_caps)
|
|
{
|
|
/*
|
|
* Note that as a general rule, the high half of the MSRs (bits in
|
|
* the control fields which may be 1) should be initialized by the
|
|
* intersection of the underlying hardware's MSR (i.e., features which
|
|
* can be supported) and the list of features we want to expose -
|
|
* because they are known to be properly supported in our code.
|
|
* Also, usually, the low half of the MSRs (bits which must be 1) can
|
|
* be set to 0, meaning that L1 may turn off any of these bits. The
|
|
* reason is that if one of these bits is necessary, it will appear
|
|
* in vmcs01 and prepare_vmcs02, when it bitwise-or's the control
|
|
* fields of vmcs01 and vmcs02, will turn these bits off - and
|
|
* nested_vmx_l1_wants_exit() will not pass related exits to L1.
|
|
* These rules have exceptions below.
|
|
*/
|
|
|
|
/* pin-based controls */
|
|
rdmsr(MSR_IA32_VMX_PINBASED_CTLS,
|
|
msrs->pinbased_ctls_low,
|
|
msrs->pinbased_ctls_high);
|
|
msrs->pinbased_ctls_low |=
|
|
PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR;
|
|
msrs->pinbased_ctls_high &=
|
|
PIN_BASED_EXT_INTR_MASK |
|
|
PIN_BASED_NMI_EXITING |
|
|
PIN_BASED_VIRTUAL_NMIS |
|
|
(enable_apicv ? PIN_BASED_POSTED_INTR : 0);
|
|
msrs->pinbased_ctls_high |=
|
|
PIN_BASED_ALWAYSON_WITHOUT_TRUE_MSR |
|
|
PIN_BASED_VMX_PREEMPTION_TIMER;
|
|
|
|
/* exit controls */
|
|
rdmsr(MSR_IA32_VMX_EXIT_CTLS,
|
|
msrs->exit_ctls_low,
|
|
msrs->exit_ctls_high);
|
|
msrs->exit_ctls_low =
|
|
VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR;
|
|
|
|
msrs->exit_ctls_high &=
|
|
#ifdef CONFIG_X86_64
|
|
VM_EXIT_HOST_ADDR_SPACE_SIZE |
|
|
#endif
|
|
VM_EXIT_LOAD_IA32_PAT | VM_EXIT_SAVE_IA32_PAT |
|
|
VM_EXIT_CLEAR_BNDCFGS | VM_EXIT_LOAD_IA32_PERF_GLOBAL_CTRL;
|
|
msrs->exit_ctls_high |=
|
|
VM_EXIT_ALWAYSON_WITHOUT_TRUE_MSR |
|
|
VM_EXIT_LOAD_IA32_EFER | VM_EXIT_SAVE_IA32_EFER |
|
|
VM_EXIT_SAVE_VMX_PREEMPTION_TIMER | VM_EXIT_ACK_INTR_ON_EXIT;
|
|
|
|
/* We support free control of debug control saving. */
|
|
msrs->exit_ctls_low &= ~VM_EXIT_SAVE_DEBUG_CONTROLS;
|
|
|
|
/* entry controls */
|
|
rdmsr(MSR_IA32_VMX_ENTRY_CTLS,
|
|
msrs->entry_ctls_low,
|
|
msrs->entry_ctls_high);
|
|
msrs->entry_ctls_low =
|
|
VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR;
|
|
msrs->entry_ctls_high &=
|
|
#ifdef CONFIG_X86_64
|
|
VM_ENTRY_IA32E_MODE |
|
|
#endif
|
|
VM_ENTRY_LOAD_IA32_PAT | VM_ENTRY_LOAD_BNDCFGS |
|
|
VM_ENTRY_LOAD_IA32_PERF_GLOBAL_CTRL;
|
|
msrs->entry_ctls_high |=
|
|
(VM_ENTRY_ALWAYSON_WITHOUT_TRUE_MSR | VM_ENTRY_LOAD_IA32_EFER);
|
|
|
|
/* We support free control of debug control loading. */
|
|
msrs->entry_ctls_low &= ~VM_ENTRY_LOAD_DEBUG_CONTROLS;
|
|
|
|
/* cpu-based controls */
|
|
rdmsr(MSR_IA32_VMX_PROCBASED_CTLS,
|
|
msrs->procbased_ctls_low,
|
|
msrs->procbased_ctls_high);
|
|
msrs->procbased_ctls_low =
|
|
CPU_BASED_ALWAYSON_WITHOUT_TRUE_MSR;
|
|
msrs->procbased_ctls_high &=
|
|
CPU_BASED_INTR_WINDOW_EXITING |
|
|
CPU_BASED_NMI_WINDOW_EXITING | CPU_BASED_USE_TSC_OFFSETTING |
|
|
CPU_BASED_HLT_EXITING | CPU_BASED_INVLPG_EXITING |
|
|
CPU_BASED_MWAIT_EXITING | CPU_BASED_CR3_LOAD_EXITING |
|
|
CPU_BASED_CR3_STORE_EXITING |
|
|
#ifdef CONFIG_X86_64
|
|
CPU_BASED_CR8_LOAD_EXITING | CPU_BASED_CR8_STORE_EXITING |
|
|
#endif
|
|
CPU_BASED_MOV_DR_EXITING | CPU_BASED_UNCOND_IO_EXITING |
|
|
CPU_BASED_USE_IO_BITMAPS | CPU_BASED_MONITOR_TRAP_FLAG |
|
|
CPU_BASED_MONITOR_EXITING | CPU_BASED_RDPMC_EXITING |
|
|
CPU_BASED_RDTSC_EXITING | CPU_BASED_PAUSE_EXITING |
|
|
CPU_BASED_TPR_SHADOW | CPU_BASED_ACTIVATE_SECONDARY_CONTROLS;
|
|
/*
|
|
* We can allow some features even when not supported by the
|
|
* hardware. For example, L1 can specify an MSR bitmap - and we
|
|
* can use it to avoid exits to L1 - even when L0 runs L2
|
|
* without MSR bitmaps.
|
|
*/
|
|
msrs->procbased_ctls_high |=
|
|
CPU_BASED_ALWAYSON_WITHOUT_TRUE_MSR |
|
|
CPU_BASED_USE_MSR_BITMAPS;
|
|
|
|
/* We support free control of CR3 access interception. */
|
|
msrs->procbased_ctls_low &=
|
|
~(CPU_BASED_CR3_LOAD_EXITING | CPU_BASED_CR3_STORE_EXITING);
|
|
|
|
/*
|
|
* secondary cpu-based controls. Do not include those that
|
|
* depend on CPUID bits, they are added later by
|
|
* vmx_vcpu_after_set_cpuid.
|
|
*/
|
|
if (msrs->procbased_ctls_high & CPU_BASED_ACTIVATE_SECONDARY_CONTROLS)
|
|
rdmsr(MSR_IA32_VMX_PROCBASED_CTLS2,
|
|
msrs->secondary_ctls_low,
|
|
msrs->secondary_ctls_high);
|
|
|
|
msrs->secondary_ctls_low = 0;
|
|
msrs->secondary_ctls_high &=
|
|
SECONDARY_EXEC_DESC |
|
|
SECONDARY_EXEC_ENABLE_RDTSCP |
|
|
SECONDARY_EXEC_VIRTUALIZE_X2APIC_MODE |
|
|
SECONDARY_EXEC_WBINVD_EXITING |
|
|
SECONDARY_EXEC_APIC_REGISTER_VIRT |
|
|
SECONDARY_EXEC_VIRTUAL_INTR_DELIVERY |
|
|
SECONDARY_EXEC_RDRAND_EXITING |
|
|
SECONDARY_EXEC_ENABLE_INVPCID |
|
|
SECONDARY_EXEC_RDSEED_EXITING |
|
|
SECONDARY_EXEC_XSAVES |
|
|
SECONDARY_EXEC_TSC_SCALING |
|
|
SECONDARY_EXEC_ENABLE_USR_WAIT_PAUSE;
|
|
|
|
/*
|
|
* We can emulate "VMCS shadowing," even if the hardware
|
|
* doesn't support it.
|
|
*/
|
|
msrs->secondary_ctls_high |=
|
|
SECONDARY_EXEC_SHADOW_VMCS;
|
|
|
|
if (enable_ept) {
|
|
/* nested EPT: emulate EPT also to L1 */
|
|
msrs->secondary_ctls_high |=
|
|
SECONDARY_EXEC_ENABLE_EPT;
|
|
msrs->ept_caps =
|
|
VMX_EPT_PAGE_WALK_4_BIT |
|
|
VMX_EPT_PAGE_WALK_5_BIT |
|
|
VMX_EPTP_WB_BIT |
|
|
VMX_EPT_INVEPT_BIT |
|
|
VMX_EPT_EXECUTE_ONLY_BIT;
|
|
|
|
msrs->ept_caps &= ept_caps;
|
|
msrs->ept_caps |= VMX_EPT_EXTENT_GLOBAL_BIT |
|
|
VMX_EPT_EXTENT_CONTEXT_BIT | VMX_EPT_2MB_PAGE_BIT |
|
|
VMX_EPT_1GB_PAGE_BIT;
|
|
if (enable_ept_ad_bits) {
|
|
msrs->secondary_ctls_high |=
|
|
SECONDARY_EXEC_ENABLE_PML;
|
|
msrs->ept_caps |= VMX_EPT_AD_BIT;
|
|
}
|
|
}
|
|
|
|
if (cpu_has_vmx_vmfunc()) {
|
|
msrs->secondary_ctls_high |=
|
|
SECONDARY_EXEC_ENABLE_VMFUNC;
|
|
/*
|
|
* Advertise EPTP switching unconditionally
|
|
* since we emulate it
|
|
*/
|
|
if (enable_ept)
|
|
msrs->vmfunc_controls =
|
|
VMX_VMFUNC_EPTP_SWITCHING;
|
|
}
|
|
|
|
/*
|
|
* Old versions of KVM use the single-context version without
|
|
* checking for support, so declare that it is supported even
|
|
* though it is treated as global context. The alternative is
|
|
* not failing the single-context invvpid, and it is worse.
|
|
*/
|
|
if (enable_vpid) {
|
|
msrs->secondary_ctls_high |=
|
|
SECONDARY_EXEC_ENABLE_VPID;
|
|
msrs->vpid_caps = VMX_VPID_INVVPID_BIT |
|
|
VMX_VPID_EXTENT_SUPPORTED_MASK;
|
|
}
|
|
|
|
if (enable_unrestricted_guest)
|
|
msrs->secondary_ctls_high |=
|
|
SECONDARY_EXEC_UNRESTRICTED_GUEST;
|
|
|
|
if (flexpriority_enabled)
|
|
msrs->secondary_ctls_high |=
|
|
SECONDARY_EXEC_VIRTUALIZE_APIC_ACCESSES;
|
|
|
|
if (enable_sgx)
|
|
msrs->secondary_ctls_high |= SECONDARY_EXEC_ENCLS_EXITING;
|
|
|
|
/* miscellaneous data */
|
|
rdmsr(MSR_IA32_VMX_MISC,
|
|
msrs->misc_low,
|
|
msrs->misc_high);
|
|
msrs->misc_low &= VMX_MISC_SAVE_EFER_LMA;
|
|
msrs->misc_low |=
|
|
MSR_IA32_VMX_MISC_VMWRITE_SHADOW_RO_FIELDS |
|
|
VMX_MISC_EMULATED_PREEMPTION_TIMER_RATE |
|
|
VMX_MISC_ACTIVITY_HLT |
|
|
VMX_MISC_ACTIVITY_WAIT_SIPI;
|
|
msrs->misc_high = 0;
|
|
|
|
/*
|
|
* This MSR reports some information about VMX support. We
|
|
* should return information about the VMX we emulate for the
|
|
* guest, and the VMCS structure we give it - not about the
|
|
* VMX support of the underlying hardware.
|
|
*/
|
|
msrs->basic =
|
|
VMCS12_REVISION |
|
|
VMX_BASIC_TRUE_CTLS |
|
|
((u64)VMCS12_SIZE << VMX_BASIC_VMCS_SIZE_SHIFT) |
|
|
(VMX_BASIC_MEM_TYPE_WB << VMX_BASIC_MEM_TYPE_SHIFT);
|
|
|
|
if (cpu_has_vmx_basic_inout())
|
|
msrs->basic |= VMX_BASIC_INOUT;
|
|
|
|
/*
|
|
* These MSRs specify bits which the guest must keep fixed on
|
|
* while L1 is in VMXON mode (in L1's root mode, or running an L2).
|
|
* We picked the standard core2 setting.
|
|
*/
|
|
#define VMXON_CR0_ALWAYSON (X86_CR0_PE | X86_CR0_PG | X86_CR0_NE)
|
|
#define VMXON_CR4_ALWAYSON X86_CR4_VMXE
|
|
msrs->cr0_fixed0 = VMXON_CR0_ALWAYSON;
|
|
msrs->cr4_fixed0 = VMXON_CR4_ALWAYSON;
|
|
|
|
/* These MSRs specify bits which the guest must keep fixed off. */
|
|
rdmsrl(MSR_IA32_VMX_CR0_FIXED1, msrs->cr0_fixed1);
|
|
rdmsrl(MSR_IA32_VMX_CR4_FIXED1, msrs->cr4_fixed1);
|
|
|
|
if (vmx_umip_emulated())
|
|
msrs->cr4_fixed1 |= X86_CR4_UMIP;
|
|
|
|
msrs->vmcs_enum = nested_vmx_calc_vmcs_enum_msr();
|
|
}
|
|
|
|
void nested_vmx_hardware_unsetup(void)
|
|
{
|
|
int i;
|
|
|
|
if (enable_shadow_vmcs) {
|
|
for (i = 0; i < VMX_BITMAP_NR; i++)
|
|
free_page((unsigned long)vmx_bitmap[i]);
|
|
}
|
|
}
|
|
|
|
__init int nested_vmx_hardware_setup(int (*exit_handlers[])(struct kvm_vcpu *))
|
|
{
|
|
int i;
|
|
|
|
if (!cpu_has_vmx_shadow_vmcs())
|
|
enable_shadow_vmcs = 0;
|
|
if (enable_shadow_vmcs) {
|
|
for (i = 0; i < VMX_BITMAP_NR; i++) {
|
|
/*
|
|
* The vmx_bitmap is not tied to a VM and so should
|
|
* not be charged to a memcg.
|
|
*/
|
|
vmx_bitmap[i] = (unsigned long *)
|
|
__get_free_page(GFP_KERNEL);
|
|
if (!vmx_bitmap[i]) {
|
|
nested_vmx_hardware_unsetup();
|
|
return -ENOMEM;
|
|
}
|
|
}
|
|
|
|
init_vmcs_shadow_fields();
|
|
}
|
|
|
|
exit_handlers[EXIT_REASON_VMCLEAR] = handle_vmclear;
|
|
exit_handlers[EXIT_REASON_VMLAUNCH] = handle_vmlaunch;
|
|
exit_handlers[EXIT_REASON_VMPTRLD] = handle_vmptrld;
|
|
exit_handlers[EXIT_REASON_VMPTRST] = handle_vmptrst;
|
|
exit_handlers[EXIT_REASON_VMREAD] = handle_vmread;
|
|
exit_handlers[EXIT_REASON_VMRESUME] = handle_vmresume;
|
|
exit_handlers[EXIT_REASON_VMWRITE] = handle_vmwrite;
|
|
exit_handlers[EXIT_REASON_VMOFF] = handle_vmoff;
|
|
exit_handlers[EXIT_REASON_VMON] = handle_vmon;
|
|
exit_handlers[EXIT_REASON_INVEPT] = handle_invept;
|
|
exit_handlers[EXIT_REASON_INVVPID] = handle_invvpid;
|
|
exit_handlers[EXIT_REASON_VMFUNC] = handle_vmfunc;
|
|
|
|
return 0;
|
|
}
|
|
|
|
struct kvm_x86_nested_ops vmx_nested_ops = {
|
|
.leave_nested = vmx_leave_nested,
|
|
.check_events = vmx_check_nested_events,
|
|
.hv_timer_pending = nested_vmx_preemption_timer_pending,
|
|
.triple_fault = nested_vmx_triple_fault,
|
|
.get_state = vmx_get_nested_state,
|
|
.set_state = vmx_set_nested_state,
|
|
.get_nested_state_pages = vmx_get_nested_state_pages,
|
|
.write_log_dirty = nested_vmx_write_pml_buffer,
|
|
.enable_evmcs = nested_enable_evmcs,
|
|
.get_evmcs_version = nested_get_evmcs_version,
|
|
};
|